1.环境说明
安装目录:/var/coreseek/coreseek
配置文件:(./etc/sphinx.conf)
#searchd服务配置 searchd { port = 9312 log = /var/coreseek/coreseek/var/log/searchd.log query_log = /var/coreseek/coreseek/var/log/query.log read_timeout = 5 max_children = 30 pid_file = /var/coreseek/coreseek/var/log/searchd.pid max_matches = 1000 seamless_rotate = 1 preopen_indexes = 0 unlink_old = 1 listen = localhost:9306:mysql41 } #数据源配置 source ztm_sheet_xml_source { type = xmlpipe xmlpipe_command = cat /var/www/htdocs/myweb/application/data/sphinxforsheet.pipe.xml xmlpipe_field = sh_title xmlpipe_field = sh_intro xmlpipe_attr_uint = sh_id xmlpipe_attr_timestamp = sh_time xmlpipe_attr_uint = sh_uid xmlpipe_attr_uint = sh_subject xmlpipe_attr_uint = sh_question_count xmlpipe_attr_uint = sh_note_count xmlpipe_attr_uint = sh_view_count xmlpipe_attr_uint = sh_share_count xmlpipe_attr_uint = sh_praise_count xmlpipe_attr_uint = sh_download_count xmlpipe_attr_uint = sh_admin_note xmlpipe_attr_uint = sh_year xmlpipe_attr_uint = sh_type xmlpipe_attr_uint = sh_grade xmlpipe_attr_uint = sh_city } #索引配置 index ztm_sheet_index { source = ztm_sheet_xml_source path = /var/coreseek/coreseek/var/data/sheet docinfo = extern charset_dictpath = /var/coreseek/mmseg3/etc charset_type = zh_cn.utf-8 ngram_len = 0 }
<?php class script_sphinxindexforsheet { public static function init() { $content = self::getXmlPipeHead(); $filename = "sphinxforsheet.pipe.xml"; $f = fopen($filename, "w"); if($f){ fwrite($f, $content); } fclose($f); } public static function generateIndex() { self::init(); $begin = time(); $min_id = 1; // 最小的id值,最好使用程序获取 $max_id = 100000; // 最大的id值,最好使用程序获取 $begin_id = $min_id; $limit = 3000; do{ $time1 = time(); unset($list); $list = self::getSheets($begin_id-1, $begin_id+$limit); $begin_id += $limit; usleep(300); }while(($begin_id-$limit)<=$max_id); self::appendXmlPipe(self::getXmlPipeFooter()); } protected static function appendXmlPipe($content){ $f = fopen("sphinxforsheet.pipe.xml", "a"); if($f){ fwrite($f, $content); } fclose($f); } protected static function getXmlPipeFooter(){ $xml = "</sphinx:docset>\n"; return $xml; } private static function getSheets($offset, $length) { $offset = $offset-1; $host = 'localhost:'; $port = 3306; $user = 'root'; $pwd = 'abc123'; $dbname = 'test'; $mysql = mysql_connect($host.$port, $user, $pwd) or die('Could not connect: ' . mysql_error()); mysql_select_db($dbname); $sql = "select id,uid,create_time,title,intro,subject,question_count,note_count,view_count,share_count,praise_count,download_count,admin_note,type,year,city,grade from sheet where id>$offset and id<$length"; $ret = mysql_query($sql); while ($row = mysql_fetch_assoc($ret)) { $xml .="<sphinx:document id=\"" . $row['id'] . "\">\n"; $xml .="<sh_title><![CDATA[" . $row['title'] ."]]></sh_title>\n"; $xml .="<sh_intro><![CDATA[" . $row['intro'] ."]]></sh_intro>\n"; $xml .="<sh_id>" . $row['id'] ."</sh_id>\n"; $xml .="<sh_uid>" . $row['uid'] ."</sh_uid>\n"; $xml .="<sh_time>" . $row['create_time'] ."</sh_time>\n"; $xml .="<sh_subject>" . $row['subject'] ."</sh_subject>\n"; $xml .="<sh_question_count>" . $row['question_count'] ."</sh_question_count>\n"; $xml .="<sh_note_count>" . $row['note_count'] ."</sh_note_count>\n"; $xml .="<sh_view_count>" . $row['view_count'] ."</sh_view_count>\n"; $xml .="<sh_share_count>" . $row['share_count'] ."</sh_share_count>\n"; $xml .="<sh_praise_count>" . $row['praise_count'] ."</sh_praise_count>\n"; $xml .="<sh_download_count>" . $row['download_count'] ."</sh_download_count>\n"; $xml .="<sh_admin_note>" . $row['admin_note'] ."</sh_admin_note>\n"; $xml .="<sh_year>" . $row['year'] ."</sh_year>\n"; $xml .="<sh_type>" . $row['type'] ."</sh_type>\n"; $xml .="<sh_grade>" . $row['grade'] ."</sh_grade>\n"; $xml .="<sh_city>" . $row['city'] ."</sh_city>\n"; $xml .="</sphinx:document>\n"; } mysql_close($mysql); self::appendXmlPipe($xml); } private static function getXmlPipeHead(){ $xml = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"; $xml .= "<sphinx:docset>\n"; $xml .= "<sphinx:schema>\n"; $xml .= "<sphinx:field name=\"sh_title\"/>\n"; $xml .= "<sphinx:field name=\"sh_intro\"/>\n"; $xml .= "<sphinx:attr name=\"sh_id\" type=\"int\"/>\n"; $xml .= "<sphinx:attr name=\"sh_uid\" type=\"int\"/>\n"; $xml .= "<sphinx:attr name=\"sh_time\" type=\"timestamp\"/>\n"; $xml .= "<sphinx:attr name=\"sh_subject\" type=\"int\"/>\n"; $xml .= "<sphinx:attr name=\"sh_question_count\" type=\"int\"/>\n"; $xml .= "<sphinx:attr name=\"sh_note_count\" type=\"int\"/>\n"; $xml .= "<sphinx:attr name=\"sh_view_count\" type=\"int\"/>\n"; $xml .= "<sphinx:attr name=\"sh_share_count\" type=\"int\"/>\n"; $xml .= "<sphinx:attr name=\"sh_praise_count\" type=\"int\"/>\n"; $xml .= "<sphinx:attr name=\"sh_download_count\" type=\"int\"/>\n"; $xml .= "<sphinx:attr name=\"sh_admin_note\" type=\"int\"/>\n"; $xml .= "<sphinx:attr name=\"sh_year\" type=\"int\"/>\n"; $xml .= "<sphinx:attr name=\"sh_type\" type=\"int\"/>\n"; $xml .= "<sphinx:attr name=\"sh_grade\" type=\"int\"/>\n"; $xml .= "<sphinx:attr name=\"sh_city\" type=\"int\"/>\n"; $xml .= "</sphinx:schema>\n"; return $xml; } } script_sphinxindexforsheet::generateIndex(); ?>
cd /var/www/htdocs/myweb/application/script
php sphinxindex.php &
sleep 5
#index
cd /var/coreseek/coreseek
./bin/indexer -c etc/sphinx.conf --rotate ztm_sheet_index
<?php require_once '/var/www/htdocs/myweb/application/lib/sphinx/sphinxapi.php'; class application_biz_searchsheet { public function query() { $verify = $_POST; $key = isset($_REQUEST['k'])?strval($_REQUEST['k']):''; $subject = isset($_REQUEST['s'])?strval($_REQUEST['s']):''; //年份范围 $year_min = -1; $year_max = -1; if(isset($_REQUEST['ymin']) && isset($_REQUEST['ymax'])){ $year_min = intval($_REQUEST['ymin']); $year_max = intval($_REQUEST['ymax']); } // 城市 $city = isset($_REQUEST['c'])?intval($_REQUEST['c']):0; // 卷子类型 $arr_type = isset($_REQUEST['t'])?strval($_REQUEST['t']):null; $arr_shtype = null; if($arr_type){ $arr_shtype = array(); foreach($arr_type as $type){ $arr_shtype[] = intval($type); } } $grade = isset($_REQUEST['g'])?intval($_REQUEST['g']):-1; // 年级 $page = isset($_REQUEST['p'])?intval($_REQUEST['p']):1; // 页数 $desc_order_by = "sh_download_count";//默认按照时间排序 if(isset($_REQUEST['desc'])){ $desc_order_by = $_REQUEST['desc']; } $list = $this->search(array( 'key'=>$key, 'subject'=>$subject, 'year_min'=>$year_min, 'year_max'=>$year_max, 'city' => $city, 'arr_shtype'=>$arr_shtype, 'grade'=>$grade, 'page'=>$page, 'pagenum'=>20, 'desc_order_by'=>$desc_order_by)); return array( "verify"=>$verify, "list"=>$list, ); } public function search($condition, $flag = false) { $res = $this->searchd($condition); $data = array(); if ($res) { $total = $res['total']; $total_found = $res['total_found']; if($res && isset($res['matches'])) { $matches = $res['matches']; // 根据 $matches 的值再去查询数据库,即可获得对应的详细数据 foreach ($matches as $row) { $data['sheet'][] = "查询数据库获得的结果"; } } $pagenum = isset($condition['pagenum'])?$condition['pagenum']:10; $data['other']['total'] = $total; $data['other']['total_found'] = $total_found; $data['other']['total_page'] = $total%$pagenum==0?intval($total/$pagenum):intval($total/$pagenum)+1; } return $data; } public function searchd($condition) { // 关键字 $key = isset($condition['key'])?strval($condition['key']):''; // 当前科目域名 $subject = isset($condition['subject'])?trim(strval($condition['subject'])):''; // 年份下限 $year_min = isset($condition['year_min'])?intval($condition['year_min']):-1; // 年份上限 $year_max = isset($condition['year_max'])?intval($condition['year_max']):-1; // 城市 $city = isset($condition['city'])?intval($condition['city']):0; // 卷子类型 $arr_shtype = isset($condition['arr_shtype'])?$condition['arr_shtype']:null; // 年级 $grade = isset($condition['grade'])?intval($condition['grade']):-1; //分页,从1开始 $page = isset($condition['page'])?intval($condition['page']):1; //每页数量 $pagenum = isset($condition['pagenum'])?intval($condition['pagenum']):10; //用来做降序排列的字段名称 $desc_order_by = isset($condition['desc_order_by'])?$condition['desc_order_by']:null;// try { $client = $this->connect(); $client->SetMatchMode ( SPH_MATCH_EXTENDED2 ); $client->SetRankingMode ( SPH_RANK_WORDCOUNT ); if($desc_order_by){ $client->SetSortMode (SPH_SORT_ATTR_DESC, $desc_order_by); } else { $client->SetSortMode ( SPH_SORT_RELEVANCE ); } $sid = isset($condition['sid'])?$condition['sid']:0; if($sid>0){ $client->SetFilter('sh_subject', array($sid)); } // 年份范围 if($year_min>-1 && $year_max>-1){ $client->SetFilterRange('sh_year', $year_min, $year_max); } // 城市 if($city>0){ $client->SetFilter('sh_city', array($city)); } // 卷子类型 if($arr_shtype){ $client->SetFilter('sh_type', $arr_shtype); } // 年级 if($grade>-1){ $client->SetFilter('sh_grade', array($grade)); } // 分页 $page = ($page < 1) ? 1 : $page; $offset = ($page-1)*$pagenum; $client->SetLimits($offset, $pagenum); $res = $client->Query ("$key", "ztm_sheet_index"); return $res; } catch (Exception $e) { return false; } } protected function connect(){ try{ $host = '127.0.0.1'; $port = 9312; $client = new SphinxClient (); $client->SetServer ($host, intval($port)); return $client; } catch (Exception $e) { return false; } } } ?>$obj = new application_biz_searchsheet();