高速索引 (在新款CPU上,近10 MB/秒); 高速搜索 (2-4G的文本量中平均查询速度不到0.1秒); 高可用性 (单CPU上最大可支持100 GB的文本,100M文档); 提供良好的相关性排名 支持分布式搜索; 提供文档摘要生成; 提供从MySQL内部的插件式存储引擎上搜索 支持布尔,短语, 和近义词查询; 支持每个文档多个全文检索域(默认最大32个); 支持每个文档多属性; 支持断词; 支持单字节编码与UTF-8编码;
[root@test3 ~]# tar xf coreseek-4.1-beta.tar.gz -C /usr/src [root@test3 ~]# cd /usr/src/coreseek-4.1-beta [root@test3 coreseek-4.1-beta]# ls csft-4.1 mmseg-3.2.14 README.txt testpack # 其中csft-4.1为sphinx源码,mmseg为中文分词安装包 [root@test3 coreseek-4.1-beta]# cd mmseg-3.2.14 [root@test3 mmseg-3.2.14]# ./bootstrap # 创建生成的文件 [root@test3 mmseg-3.2.14]# ./configure --prefix=/usr/local/mmseg3 [root@test3 mmseg-3.2.14]# make && make install
[root@test3 mmseg-3.2.14]# cd ../csft-4.1/ [root@test3 coreseek-4.1-beta]# ./buildconf.sh # 检测并创建安装文件 [root@test3 coreseek-4.1-beta]# ./configure --prefix=/usr/local/coreseek --without-unixodbc --with-mmseg --with-mmseg-includes=/usr/local/mmseg3/include/mmseg/ --with-mmseg-libs=/usr/local/mmseg3/lib/ --with-mysql [root@test3 coreseek-4.1-beta]# make && make install
[root@test3 csft-4.1]# cd /usr/local/coreseek/ [root@test3 coreseek]# ls bin etc share var [root@test3 coreseek]# cd etc/ && ll -rw-r--r--. 1 root root 903 Jul 28 10:08 example.sql # 示例的数据 -rw-r--r--. 1 root root 31081 Jul 28 09:25 sphinx.conf.dist # 完整配置文件 -rw-r--r--. 1 root root 1163 Jun 12 00:40 sphinx-min.conf.dist # 最小化配置文件 # 在mysql中创建一个test数据库,将example.sql导入 [root@test3 etc]# mysql -uroot -ppasswd -e 'create database test2;' [root@test3 etc]# mysql -uroot -ppasswd < example.sql # 生成配置文件 [root@test3 etc]# cp sphinx-min.conf.dist csft.conf [root@test3 etc]# vim csft.conf # # Minimal Sphinx configuration sample (clean, simple, functional) # source src1 # 数据源 { type = mysql sql_host = localhost sql_user = root sql_pass = passwd sql_db = test sql_port = 3306 # optional, default is 3306 sql_query = \ SELECT id, group_id, UNIX_TIMESTAMP(date_added) AS date_added, title, content \ FROM documents sql_attr_uint = group_id sql_attr_timestamp = date_added sql_query_info = SELECT * FROM documents WHERE id=$id } index test1 # 索引 { source = src1 path = /usr/local/coreseek/var/data/test1 docinfo = extern charset_dictpath = /usr/local/mmseg3/etc/ charset_type = zh_cn.utf-8 } index testrt { type = rt rt_mem_limit = 32M path = /usr/local/coreseek/var/data/testrt charset_type = utf-8 rt_field = title rt_field = content rt_attr_uint = gid } indexer # 构建索引服务 { mem_limit = 32M } searchd # 搜索查询服务 { listen = 9312 listen = 9306:mysql41 log = /usr/local/coreseek/var/log/searchd.log query_log = /usr/local/coreseek/var/log/query.log read_timeout = 5 max_children = 30 pid_file = /usr/local/coreseek/var/log/searchd.pid max_matches = 1000 seamless_rotate = 1 preopen_indexes = 1 unlink_old = 1 workers = threads # for RT to work }
[root@test3 etc]# /usr/local/coreseek/bin/searchd -c csft.conf # 启动服务 Coreseek Fulltext 4.1 [ Sphinx 2.0.2-dev (r2922)] Copyright (c) 2007-2011, Beijing Choice Software Technologies Inc (http://www.coreseek.com) using config file 'sphinx-min.conf.dist'... WARNING: compat_sphinxql_magics=1 is deprecated; please update your application and config listening on all interfaces, port=9312 listening on all interfaces, port=9306 precaching index 'test1' precaching index 'testrt' precached 2 indexes in 0.001 sec [root@test3 etc]# ss -tnl | grep -e 9306 -e 9312 # 端口已开启 LISTEN 0 5 *:9306 *:* LISTEN 0 5 *:9312 *:* # 测试 [root@test3 etc]# /usr/local/coreseek/bin/indexer --all --rotate # 创建索引 [root@test3 etc]# /usr/local/coreseek/bin/search test # 查找test关键字 Coreseek Fulltext 4.1 [ Sphinx 2.0.2-dev (r2922)] Copyright (c) 2007-2011, Beijing Choice Software Technologies Inc (http://www.coreseek.com) using config file '/usr/local/coreseek/etc/csft.conf'... index 'test1': query 'test ': returned 3 matches of 3 total in 0.000 sec displaying matches: 1. document=1, weight=2421, group_id=1, date_added=Thu Jul 28 13:39:05 2016 id=1 group_id=1 group_id2=5 date_added=2016-07-28 13:39:05 title=test one content=this is my test document number one. also checking search within phrases. 2. document=2, weight=2421, group_id=1, date_added=Thu Jul 28 13:39:05 2016 id=2 group_id=1 group_id2=6 date_added=2016-07-28 13:39:05 title=test two content=this is my test document number two 3. document=4, weight=1442, group_id=2, date_added=Thu Jul 28 13:39:05 2016 id=4 group_id=2 group_id2=8 date_added=2016-07-28 13:39:05 title=doc number four content=this is to test groups words: 1. 'test': 3 documents, 5 hits index 'testrt': search error: failed to open /usr/local/coreseek/var/data/testrt.sph: No such file or directory.
[root@test3 etc]# vim csft.conf # # Minimal Sphinx configuration sample (clean, simple, functional) # source host { type = mysql sql_host = localhost sql_user = root sql_pass = passwd sql_db = test sql_port = 3306 # optional, default is 3306 } source src1:host { sql_query = \ SELECT id, group_id, UNIX_TIMESTAMP(date_added) AS date_added, title, content \ FROM documents sql_joined_field = tags from query; SELECT id, name FROM tags ORDER BY id ASC sql_attr_uint = group_id sql_attr_timestamp = date_added sql_query_info = SELECT * FROM documents WHERE id=$id } source src2:host { sql_query = SELECT id,name,docid,tagid FROM tags ORDER BY tagid ASC sql_attr_uint = id sql_query_info = SELECT * FROM tags WHERE id=$id } index test1 { source = src1 path = /usr/local/coreseek/var/data/test1 docinfo = extern charset_dictpath = /usr/local/mmseg3/etc/ charset_type = zh_cn.utf-8 } index test2 { source = src2 path = /usr/local/coreseek/var/data/test2 docinfo = extern charset_dictpath = /usr/local/mmseg3/etc/ charset_type = zh_cn.utf-8 } index testrt { type = rt rt_mem_limit = 32M path = /usr/local/coreseek/var/data/testrt charset_type = utf-8 rt_field = title rt_field = content rt_attr_uint = gid } indexer { mem_limit = 32M } searchd { listen = 9312 listen = 9306:mysql41 log = /usr/local/coreseek/var/log/searchd.log query_log = /usr/local/coreseek/var/log/query.log read_timeout = 5 max_children = 30 pid_file = /usr/local/coreseek/var/log/searchd.pid max_matches = 1000 seamless_rotate = 1 preopen_indexes = 1 unlink_old = 1 workers = threads # for RT to work }
mysql> select * from tags; +----+------------------+ | id | content | +----+------------------+ | 1 | test one time | | 2 | test two times | | 3 | test three times | | 4 | test four times | +----+------------------+ 4 rows in set (0.00 sec) mysql> select * from documents; +----+----------+-----------+---------------------+-----------------+-----------------------+ | id | group_id | group_id2 | date_added | title | content | +----+----------+-----------+---------------------+-----------------+-----------------------+ | 1 | 1 | 5 | 2016-07-28 13:57:00 | test one | 第一个测试文档 | | 2 | 1 | 6 | 2016-07-28 13:57:00 | test two | 第二个测试文档 | | 3 | 2 | 7 | 2016-07-28 13:57:00 | another doc | 另一个文档 | | 4 | 2 | 8 | 2016-07-28 13:57:00 | doc number four | 测试组 | +----+----------+-----------+---------------------+-----------------+-----------------------+ 4 rows in set (0.00 sec)
[root@test3 etc]# /usr/local/coreseek/bin/search test Coreseek Fulltext 4.1 [ Sphinx 2.0.2-dev (r2922)] Copyright (c) 2007-2011, Beijing Choice Software Technologies Inc (http://www.coreseek.com) using config file '/usr/local/coreseek/etc/csft.conf'... index 'test1': query 'test ': returned 4 matches of 4 total in 0.000 sec displaying matches: 1. document=1, weight=2230, group_id=1, date_added=Thu Jul 28 13:57:00 2016 id=1 group_id=1 group_id2=5 date_added=2016-07-28 13:57:00 title=test one content=第一个测试文档 2. document=2, weight=2230, group_id=1, date_added=Thu Jul 28 13:57:00 2016 id=2 group_id=1 group_id2=6 date_added=2016-07-28 13:57:00 title=test two content=第二个测试文档 3. document=3, weight=1304, group_id=2, date_added=Thu Jul 28 13:57:00 2016 id=3 group_id=2 group_id2=7 date_added=2016-07-28 13:57:00 title=another doc content=另一个文档 4. document=4, weight=1304, group_id=2, date_added=Thu Jul 28 13:57:00 2016 id=4 group_id=2 group_id2=8 date_added=2016-07-28 13:57:00 title=doc number four content=测试组 words: 1. 'test': 4 documents, 6 hits index 'testrt': search error: failed to open /usr/local/coreseek/var/data/testrt.sph: No such file or directory.