sphinx/coreseek 常见的错误处理

==以下均是本人踩过的坑2333==

安装

wget http://files.opstool.com/man/coreseek-4.1-beta.tar.gz
tar -xzvf coreseek-4.1-beta.tar.gz
cd coreseek-4.1-beta

分词工具安装

cd mmseg-3.2.14
./configure --prefix=/usr/local/coreseek-4.1
make
make install

此时可能会报错：
config.status: error: cannot find input file: src/Makefile.in
如果出现了就在configure 之前执行以下命令

aclocal
libtoolize --force
automake --add-missing
autoconf
autoheader
make clean

搜索引擎安装

cd csft-4.1/
./buildconf.sh 
./configure --prefix=/usr/local/coreseek-4.1 --without-unixodbc \
--with-mmseg --with-mmseg-includes=/usr/local/coreseek-4.1/include/mmseg/ \
--with-mmseg-libs=/usr/local/coreseek-4.1/lib/ --with-mysql
make
make install

远程连接

当pdo连接sphinx时
由于==php版本==问题，可能出现以下警告
`Warning: PDO::__construct(): Server sent charset (0) unknown to the client.
Please, report to the developers`
==这是由于php无法识别sphinx网络通讯时的编码造成的==
我的处理方法是==修改源码==：searchd.cpp
找到并打开searchd.cpp（在src目录下，也可以find一下）后搜索server language
将以下代码
"\x00" // server language
变更为
"\x21" // server language
==x21 表示 utf8 and utf8_general_ci==
修改后重新编译即可
重新尝试pdo连接，一切顺利2333

附上一份配置文件样板

#
# Sphinx configuration file sample
#
# WARNING! While this sample file mentions all available options,
# it contains (very) short helper descriptions only. Please refer to
# doc/sphinx.html for details.
#

#############################################################################
## data source definition
#############################################################################

source myself_goods
{
    # data source type. mandatory, no default value
    # known types are mysql, pgsql, mssql, xmlpipe, xmlpipe2, odbc
    type            = mysql

    #####################################################################
    ## SQL settings (for 'mysql' and 'pgsql' types)
    #####################################################################

    # some straightforward parameters for SQL source types
    sql_host        = 192.168.0.110
    sql_user        = root
    sql_pass        =
    sql_db            = myself
    sql_port        = 3306    # optional, default is 3306

    # UNIX socket name
    # optional, default is empty (reuse client library defaults)
    # usually '/var/lib/mysql/mysql.sock' on Linux
    # usually '/tmp/mysql.sock' on FreeBSD
    #
    # sql_sock        = /tmp/mysql.sock


    # MySQL specific client connection flags
    # optional, default is 0
    #
    # mysql_connect_flags    = 32 # enable compression

    # MySQL specific SSL certificate settings
    # optional, defaults are empty
    #
    # mysql_ssl_cert        = /etc/ssl/client-cert.pem
    # mysql_ssl_key        = /etc/ssl/client-key.pem
    # mysql_ssl_ca        = /etc/ssl/cacert.pem

    # MS SQL specific Windows authentication mode flag
    # MUST be in sync with charset_type index-level setting
    # optional, default is 0
    #
    # mssql_winauth        = 1 # use currently logged on user credentials


    # MS SQL specific Unicode indexing flag
    # optional, default is 0 (request SBCS data)
    #
    # mssql_unicode        = 1 # request Unicode data from server


    # ODBC specific DSN (data source name)
    # mandatory for odbc source type, no default value
    #
    # odbc_dsn        = DBQ=C:\data;DefaultDir=C:\data;Driver={Microsoft Text Driver (*.txt; *.csv)};
    # sql_query        = SELECT id, data FROM documents.csv


    # ODBC and MS SQL specific, per-column buffer sizes
    # optional, default is auto-detect
    #
    # sql_column_buffers    = content=12M, comments=1M


    # pre-query, executed before the main fetch query
    # multi-value, optional, default is empty list of queries
    #
    sql_query_pre = SET NAMES utf8,@last_update_time = (SELECT MAX(update_time) FROM myself_goods);
    # sql_query_pre = REPLACE INTO sphinx_counter SELECT 1, MAX(id) FROM myself_goods
    sql_query_range = SELECT MIN(id),MAX(id) FROM myself_goods
    sql_range_step = 10000
    # sql_query_pre        = SET SESSION query_cache_type=OFF


    # main document fetch query
    # mandatory, integer document ID field MUST be the first selected column
    sql_query = \
        SELECT id, `name`, category, brand, `column`, price, bulk_price, bulk_base, \
bulk_time_limit, bulk_switch, wholesale_price_lv1, wholesale_price_lv2, \
wholesale_price_lv3, wholesale_price_lv4, wholesale_price_lv5, freight, \
weight, unit, volume, unit_volume, inventory, inventory_warn, abstract, \
details, tag, shelves, comments_method, comments_audit, UNIX_TIMESTAMP(create_time) as create_time, \
UNIX_TIMESTAMP(update_time) as update_time, is_delete, sales, profit, browse, collect, open_the_user_class \
FROM myself_goods \
WHERE id>=$start AND id<=$end

    # 增量索引创建完成之后，更新最新时间
    sql_query_post  = UPDATE sphinx_counter SET last_update_time=@last_update_time where counter_id=1


    # joined/payload field fetch query
    # joined fields let you avoid (slow) JOIN and GROUP_CONCAT
    # payload fields let you attach custom per-keyword values (eg. for ranking)
    #
    # syntax is FIELD-NAME 'from'  ( 'query' | 'payload-query' ); QUERY
    # joined field QUERY should return 2 columns (docid, text)
    # payload field QUERY should return 3 columns (docid, keyword, weight)
    #
    # REQUIRES that query results are in ascending document ID order!
    # multi-value, optional, default is empty list of queries
    #
    # sql_joined_field    = tags from query; SELECT docid, CONCAT('tag',tagid) FROM tags ORDER BY docid ASC
    # sql_joined_field    = wtags from payload-query; SELECT docid, tag, tagweight FROM tags ORDER BY docid ASC


    # file based field declaration
    #
    # content of this field is treated as a file name
    # and the file gets loaded and indexed in place of a field
    #
    # max file size is limited by max_file_field_buffer indexer setting
    # file IO errors are non-fatal and get reported as warnings
    #
    # sql_file_field        = content_file_path


    # range query setup, query that must return min and max ID values
    # optional, default is empty
    #
    # sql_query will need to reference $start and $end boundaries
    # if using ranged query:
    #
    # sql_query        = \
    #    SELECT doc.id, doc.id AS group, doc.title, doc.data \
    #    FROM documents doc \
    #    WHERE id>=$start AND id<=$end
    #
    # sql_query_range        = SELECT MIN(id),MAX(id) FROM documents


    # range query step
    # optional, default is 1024
    #
    # sql_range_step        = 1000


    # unsigned integer attribute declaration
    # multi-value (an arbitrary number of attributes is allowed), optional
    # optional bit size can be specified, default is 32
    #
    # sql_attr_uint        = author_id
    # sql_attr_uint        = forum_id:9 # 9 bits for forum_id
    sql_attr_uint = category
    sql_attr_uint = brand
    sql_attr_uint = column
    sql_attr_uint = bulk_base
    sql_attr_uint = bulk_time_limit
    sql_attr_uint = bulk_switch
    sql_attr_uint = inventory
    sql_attr_uint = inventory_warn    
    sql_attr_uint = shelves
    sql_attr_uint = comments_method
    sql_attr_uint = comments_audit    
    sql_attr_uint = is_delete
    sql_attr_uint = sales
    sql_attr_uint = browse
    sql_attr_uint = collect
    sql_attr_uint = open_the_user_class

    # boolean attribute declaration
    # multi-value (an arbitrary number of attributes is allowed), optional
    # equivalent to sql_attr_uint with 1-bit size
    #
    # sql_attr_bool        = is_deleted


    # bigint attribute declaration
    # multi-value (an arbitrary number of attributes is allowed), optional
    # declares a signed (unlike uint!) 64-bit attribute
    #
    # sql_attr_bigint        = my_bigint_id


    # UNIX timestamp attribute declaration
    # multi-value (an arbitrary number of attributes is allowed), optional
    # similar to integer, but can also be used in date functions
    #
    # sql_attr_timestamp    = posted_ts
    # sql_attr_timestamp    = last_edited_ts
    sql_attr_timestamp = create_time
    sql_attr_timestamp = update_time

    # string ordinal attribute declaration
    # multi-value (an arbitrary number of attributes is allowed), optional
    # sorts strings (bytewise), and stores their indexes in the sorted list
    # sorting by this attr is equivalent to sorting by the original strings
    #
    # sql_attr_str2ordinal    = author_name


    # floating point attribute declaration
    # multi-value (an arbitrary number of attributes is allowed), optional
    # values are stored in single precision, 32-bit IEEE 754 format
    #
    # sql_attr_float        = lat_radians
    # sql_attr_float        = long_radians
    sql_attr_float = price
    sql_attr_float = bulk_price
    sql_attr_float = wholesale_price_lv1
    sql_attr_float = wholesale_price_lv2
    sql_attr_float = wholesale_price_lv3
    sql_attr_float = wholesale_price_lv4
    sql_attr_float = wholesale_price_lv5
    sql_attr_float = freight
    sql_attr_float = weight
    sql_attr_float = volume
    sql_attr_float = profit

    # multi-valued attribute (MVA) attribute declaration
    # multi-value (an arbitrary number of attributes is allowed), optional
    # MVA values are variable length lists of unsigned 32-bit integers
    #
    # syntax is ATTR-TYPE ATTR-NAME 'from' SOURCE-TYPE [;QUERY] [;RANGE-QUERY]
    # ATTR-TYPE is 'uint' or 'timestamp'
    # SOURCE-TYPE is 'field', 'query', or 'ranged-query'
    # QUERY is SQL query used to fetch all ( docid, attrvalue ) pairs
    # RANGE-QUERY is SQL query used to fetch min and max ID values, similar to 'sql_query_range'
    #
    # sql_attr_multi        = uint tag from query; SELECT docid, tagid FROM tags
    # sql_attr_multi        = uint tag from ranged-query; \
    #    SELECT docid, tagid FROM tags WHERE id>=$start AND id<=$end; \
    #    SELECT MIN(docid), MAX(docid) FROM tags
    # sql_attr_multi = string tag from query
    # sql_attr_multi                = string tag from query;

    # string attribute declaration
    # multi-value (an arbitrary number of these is allowed), optional
    # lets you store and retrieve strings
    #
    # sql_attr_string        = stitle

    # wordcount attribute declaration
    # multi-value (an arbitrary number of these is allowed), optional
    # lets you count the words at indexing time
    #
    # sql_attr_str2wordcount    = stitle


    # combined field plus attribute declaration (from a single column)
    # stores column as an attribute, but also indexes it as a full-text field
    #
    # sql_field_string    = author
    # sql_field_str2wordcount    = title
    sql_field_string = name
        sql_field_string = unit
        sql_field_string = unit_volume
        sql_field_string = abstract
        sql_field_string = details

    
    # post-query, executed on sql_query completion
    # optional, default is empty
    #
    # sql_query_post        =

    
    # post-index-query, executed on successful indexing completion
    # optional, default is empty
    # $maxid expands to max document ID actually fetched from DB
    #
    # sql_query_post_index    = REPLACE INTO counters ( id, val ) \
    #    VALUES ( 'max_indexed_id', $maxid )


    # ranged query throttling, in milliseconds
    # optional, default is 0 which means no delay
    # enforces given delay before each query step
    sql_ranged_throttle    = 0

    # document info query, ONLY for CLI search (ie. testing and debugging)
    # optional, default is empty
    # must contain $id macro and must fetch the document by that id
    # sql_query_info        = SELECT * FROM documents WHERE id=$id

    # kill-list query, fetches the document IDs for kill-list
    # k-list will suppress matches from preceding indexes in the same query
    # optional, default is empty
    #
    # sql_query_killlist    = SELECT id FROM documents WHERE edited>=@last_reindex


    # columns to unpack on indexer side when indexing
    # multi-value, optional, default is empty list
    #
    # unpack_zlib        = zlib_column
    # unpack_mysqlcompress    = compressed_column
    # unpack_mysqlcompress    = compressed_column_2


    # maximum unpacked length allowed in MySQL COMPRESS() unpacker
    # optional, default is 16M
    #
    # unpack_mysqlcompress_maxsize    = 16M


    #####################################################################
    ## xmlpipe2 settings
    #####################################################################

    # type            = xmlpipe

    # shell command to invoke xmlpipe stream producer
    # mandatory
    #
    # xmlpipe_command        = cat /usr/local/coreseek/var/test.xml

    # xmlpipe2 field declaration
    # multi-value, optional, default is empty
    #
    # xmlpipe_field        = subject
    # xmlpipe_field        = content


    # xmlpipe2 attribute declaration
    # multi-value, optional, default is empty
    # all xmlpipe_attr_XXX options are fully similar to sql_attr_XXX
    #
    # xmlpipe_attr_timestamp    = published
    # xmlpipe_attr_uint    = author_id


    # perform UTF-8 validation, and filter out incorrect codes
    # avoids XML parser choking on non-UTF-8 documents
    # optional, default is 0
    #
    # xmlpipe_fixup_utf8    = 1
}

source myself_goods_delta:myself_goods  
{
    sql_query_pre = SET NAMES utf8,@last_update_time = (SELECT MAX(update_time) FROM myself_goods);
        sql_query_range = 
        # sql_range_step = 10000
        sql_query = \
                SELECT id, `name`, category, brand, `column`, price, bulk_price, bulk_base, \
bulk_time_limit, bulk_switch, wholesale_price_lv1, wholesale_price_lv2, \
wholesale_price_lv3, wholesale_price_lv4, wholesale_price_lv5, freight, \
weight, unit, volume, unit_volume, inventory, inventory_warn, abstract, \
details, tag, shelves, comments_method, comments_audit, UNIX_TIMESTAMP(create_time) as create_time, \
UNIX_TIMESTAMP(update_time) as update_time, is_delete, sales, profit, browse, collect, open_the_user_class \
FROM myself_goods WHERE update_time>(SELECT last_update_time FROM sphinx_counter WHERE counter_id = 1) AND update_time <= @last_update_time ####### 获取数据的sql

    # 增量索引创建完成之后，更新最新时间
        sql_query_post  = UPDATE sphinx_counter SET last_update_time=@last_update_time where counter_id=1
}


#############################################################################
## index definition
#############################################################################

# local index example
#
# this is an index which is stored locally in the filesystem
#
# all indexing-time options (such as morphology and charsets)
# are configured per local index
index myself_goods
{
    # index type
    # optional, default is 'plain'
    # known values are 'plain', 'distributed', and 'rt' (see samples below)
    # type            = plain

    # document source(s) to index
    # multi-value, mandatory
    # document IDs must be globally unique across all sources
    source            = myself_goods

    # index files path and file name, without extension
    # mandatory, path must be writable, extensions will be auto-appended
    path            = /usr/local/coreseek/var/data/myself_goods

    # document attribute values (docinfo) storage mode
    # optional, default is 'extern'
    # known values are 'none', 'extern' and 'inline'
    docinfo            = extern

    # memory locking for cached data (.spa and .spi), to prevent swapping
    # optional, default is 0 (do not mlock)
    # requires searchd to be run from root
    mlock            = 0

    # a list of morphology preprocessors to apply
    # optional, default is empty
    #
    # builtin preprocessors are 'none', 'stem_en', 'stem_ru', 'stem_enru',
    # 'soundex', and 'metaphone'; additional preprocessors available from
    # libstemmer are 'libstemmer_XXX', where XXX is algorithm code
    # (see libstemmer_c/libstemmer/modules.txt)
    #
    # morphology        = stem_en, stem_ru, soundex
    # morphology        = libstemmer_german
    # morphology        = libstemmer_sv
    morphology        = none

    # minimum word length at which to enable stemming
    # optional, default is 1 (stem everything)
    #
    # min_stemming_len    = 1


    # stopword files list (space separated)
    # optional, default is empty
    # contents are plain text, charset_table and stemming are both applied
    #
    # stopwords        = /usr/local/coreseek/var/data/stopwords.txt


    # wordforms file, in "mapfrom > mapto" plain text format
    # optional, default is empty
    #
    # wordforms        = /usr/local/coreseek/var/data/wordforms.txt


    # tokenizing exceptions file
    # optional, default is empty
    #
    # plain text, case sensitive, space insensitive in map-from part
    # one "Map Several Words => ToASingleOne" entry per line
    #
    # exceptions        = /usr/local/coreseek/var/data/exceptions.txt


    # minimum indexed word length
    # default is 1 (index everything)
    min_word_len        = 1

    # 中文分词词典文件 uni.lib 的目录
    charset_dictpath    = /usr/local/coreseek/etc/    
    # charset encoding type
    # optional, default is 'sbcs'
    # known types are 'sbcs' (Single Byte CharSet) and 'utf-8'
    charset_type        = zh_cn.utf-8

    # charset definition and case folding rules "table"
    # optional, default value depends on charset_type
    #
    # defaults are configured to include English and Russian characters only
    # you need to change the table to include additional ones
    # this behavior MAY change in future versions
    #
    # 'sbcs' default value is
    # charset_table        = 0..9, A..Z->a..z, _, a..z, U+A8->U+B8, U+B8, U+C0..U+DF->U+E0..U+FF, U+E0..U+FF
    #
    # 'utf-8' default value is
    # charset_table        = 0..9, A..Z->a..z, _, a..z, U+410..U+42F->U+430..U+44F, U+430..U+44F


    # ignored characters list
    # optional, default value is empty
    #
    # ignore_chars        = U+00AD


    # minimum word prefix length to index
    # optional, default is 0 (do not index prefixes)
    #
    # min_prefix_len        = 0


    # minimum word infix length to index
    # optional, default is 0 (do not index infixes)
    #
    # min_infix_len        = 0


    # list of fields to limit prefix/infix indexing to
    # optional, default value is empty (index all fields in prefix/infix mode)
    #
    # prefix_fields        = filename
    # infix_fields        = url, domain


    # enable star-syntax (wildcards) when searching prefix/infix indexes
    # search-time only, does not affect indexing, can be 0 or 1
    # optional, default is 0 (do not use wildcard syntax)
    #
    # enable_star        = 1


    # expand keywords with exact forms and/or stars when searching fit indexes
    # search-time only, does not affect indexing, can be 0 or 1
    # optional, default is 0 (do not expand keywords)
    #
    # expand_keywords        = 1

    
    # n-gram length to index, for CJK indexing
    # only supports 0 and 1 for now, other lengths to be implemented
    # optional, default is 0 (disable n-grams)
    #
    # ngram_len        = 1


    # n-gram characters list, for CJK indexing
    # optional, default is empty
    #
    # ngram_chars        = U+3000..U+2FA1F


    # phrase boundary characters list
    # optional, default is empty
    #
    # phrase_boundary        = ., ?, !, U+2026 # horizontal ellipsis


    # phrase boundary word position increment
    # optional, default is 0
    #
    # phrase_boundary_step    = 100


    # blended characters list
    # blended chars are indexed both as separators and valid characters
    # for instance, AT&T will results in 3 tokens ("at", "t", and "at&t")
    # optional, default is empty
    #
    # blend_chars        = +, &, U+23


    # blended token indexing mode
    # a comma separated list of blended token indexing variants
    # known variants are trim_none, trim_head, trim_tail, trim_both, skip_pure
    # optional, default is trim_none
    #
    # blend_mode        = trim_tail, skip_pure


    # whether to strip HTML tags from incoming documents
    # known values are 0 (do not strip) and 1 (do strip)
    # optional, default is 0
    html_strip = 1

    # what HTML attributes to index if stripping HTML
    # optional, default is empty (do not index anything)
    #
    # html_index_attrs    = img=alt,title; a=title;


    # what HTML elements contents to strip
    # optional, default is empty (do not strip element contents)
    #
    # html_remove_elements    = style, script


    # whether to preopen index data files on startup
    # optional, default is 0 (do not preopen), searchd-only
    #
    # preopen            = 1


    # whether to keep dictionary (.spi) on disk, or cache it in RAM
    # optional, default is 0 (cache in RAM), searchd-only
    #
    # ondisk_dict        = 1


    # whether to enable in-place inversion (2x less disk, 90-95% speed)
    # optional, default is 0 (use separate temporary files), indexer-only
    #
    # inplace_enable        = 1


    # in-place fine-tuning options
    # optional, defaults are listed below
    #
    # inplace_hit_gap        = 0 # preallocated hitlist gap size
    # inplace_docinfo_gap    = 0 # preallocated docinfo gap size
    # inplace_reloc_factor    = 0.1 # relocation buffer size within arena
    # inplace_write_factor    = 0.1 # write buffer size within arena


    # whether to index original keywords along with stemmed versions
    # enables "=exactform" operator to work
    # optional, default is 0
    #
    # index_exact_words    = 1


    # position increment on overshort (less that min_word_len) words
    # optional, allowed values are 0 and 1, default is 1
    #
    # overshort_step        = 1


    # position increment on stopword
    # optional, allowed values are 0 and 1, default is 1
    #
    # stopword_step        = 1


    # hitless words list
    # positions for these keywords will not be stored in the index
    # optional, allowed values are 'all', or a list file name
    #
    # hitless_words        = all
    # hitless_words        = hitless.txt


    # detect and index sentence and paragraph boundaries
    # required for the SENTENCE and PARAGRAPH operators to work
    # optional, allowed values are 0 and 1, default is 0
    #
    # index_sp            = 1


    # index zones, delimited by HTML/XML tags
    # a comma separated list of tags and wildcards
    # required for the ZONE operator to work
    # optional, default is empty string (do not index zones)
    #
    # index_zones        = title, h*, th
}


# inherited index example
#
# all the parameters are copied from the parent index,
# and may then be overridden in this index definition
index myself_goods_delta : myself_goods
{
    source = myself_goods_delta
    path = /usr/local/coreseek/var/data/myself_goods_delta
}


#############################################################################
## indexer settings
#############################################################################

indexer
{
    # memory limit, in bytes, kiloytes (16384K) or megabytes (256M)
    # optional, default is 32M, max is 2047M, recommended is 256M to 1024M
    mem_limit        = 32M

    # maximum IO calls per second (for I/O throttling)
    # optional, default is 0 (unlimited)
    #
    # max_iops        = 40


    # maximum IO call size, bytes (for I/O throttling)
    # optional, default is 0 (unlimited)
    #
    # max_iosize        = 1048576


    # maximum xmlpipe2 field length, bytes
    # optional, default is 2M
    #
    # max_xmlpipe2_field    = 4M


    # write buffer size, bytes
    # several (currently up to 4) buffers will be allocated
    # write buffers are allocated in addition to mem_limit
    # optional, default is 1M
    #
    # write_buffer        = 1M


    # maximum file field adaptive buffer size
    # optional, default is 8M, minimum is 1M
    #
    # max_file_field_buffer    = 32M
}

#############################################################################
## searchd settings
#############################################################################

searchd
{
    # [hostname:]port[:protocol], or /unix/socket/path to listen on
    # known protocols are 'sphinx' (SphinxAPI) and 'mysql41' (SphinxQL)
    #
    # multi-value, multiple listen points are allowed
    # optional, defaults are 9312:sphinx and 9306:mysql41, as below
    #
    # listen            = 127.0.0.1
    # listen            = 192.168.0.1:9312
    # listen            = 9312
    # listen            = /var/run/searchd.sock
    listen            = 9312:sphinx
    listen            = 9306:mysql41

    # log file, searchd run info is logged here
    # optional, default is 'searchd.log'
    log            = /usr/local/coreseek/var/log/searchd.log

    # query log file, all search queries are logged here
    # optional, default is empty (do not log queries)
    query_log        = /usr/local/coreseek/var/log/query.log

    # client read timeout, seconds
    # optional, default is 5
    read_timeout        = 5

    # request timeout, seconds
    # optional, default is 5 minutes
    client_timeout        = 300

    # maximum amount of children to fork (concurrent searches to run)
    # optional, default is 0 (unlimited)
    max_children        = 30

    # PID file, searchd process ID file name
    # mandatory
    pid_file        = /usr/local/coreseek/var/log/searchd.pid

    # max amount of matches the daemon ever keeps in RAM, per-index
    # WARNING, THERE'S ALSO PER-QUERY LIMIT, SEE SetLimits() API CALL
    # default is 1000 (just like Google)
    max_matches        = 1000

    # seamless rotate, prevents rotate stalls if precaching huge datasets
    # optional, default is 1
    seamless_rotate        = 1

    # whether to forcibly preopen all indexes on startup
    # optional, default is 1 (preopen everything)
    preopen_indexes        = 1

    # whether to unlink .old index copies on succesful rotation.
    # optional, default is 1 (do unlink)
    unlink_old        = 1

    # attribute updates periodic flush timeout, seconds
    # updates will be automatically dumped to disk this frequently
    # optional, default is 0 (disable periodic flush)
    #
    # attr_flush_period    = 900


    # instance-wide ondisk_dict defaults (per-index value take precedence)
    # optional, default is 0 (precache all dictionaries in RAM)
    #
    # ondisk_dict_default    = 1


    # MVA updates pool size
    # shared between all instances of searchd, disables attr flushes!
    # optional, default size is 1M
    mva_updates_pool    = 1M

    # max allowed network packet size
    # limits both query packets from clients, and responses from agents
    # optional, default size is 8M
    max_packet_size        = 8M

    # crash log path
    # searchd will (try to) log crashed query to 'crash_log_path.PID' file
    # optional, default is empty (do not create crash logs)
    #
    # crash_log_path        = /usr/local/coreseek/var/log/crash


    # max allowed per-query filter count
    # optional, default is 256
    max_filters        = 256

    # max allowed per-filter values count
    # optional, default is 4096
    max_filter_values    = 4096


    # socket listen queue length
    # optional, default is 5
    #
    # listen_backlog        = 5


    # per-keyword read buffer size
    # optional, default is 256K
    #
    # read_buffer        = 256K


    # unhinted read size (currently used when reading hits)
    # optional, default is 32K
    #
    # read_unhinted        = 32K


    # max allowed per-batch query count (aka multi-query count)
    # optional, default is 32
    max_batch_queries    = 32


    # max common subtree document cache size, per-query
    # optional, default is 0 (disable subtree optimization)
    #
    # subtree_docs_cache    = 4M


    # max common subtree hit cache size, per-query
    # optional, default is 0 (disable subtree optimization)
    #
    # subtree_hits_cache    = 8M


    # multi-processing mode (MPM)
    # known values are none, fork, prefork, and threads
    # optional, default is fork
    #
    workers            = threads # for RT to work


    # max threads to create for searching local parts of a distributed index
    # optional, default is 0, which means disable multi-threaded searching
    # should work with all MPMs (ie. does NOT require workers=threads)
    #
    # dist_threads        = 4


    # binlog files path; use empty string to disable binlog
    # optional, default is build-time configured data directory
    #
    # binlog_path        = # disable logging
    # binlog_path        = /usr/local/coreseek/var/data # binlog.001 etc will be created there


    # binlog flush/sync mode
    # 0 means flush and sync every second
    # 1 means flush and sync every transaction
    # 2 means flush every transaction, sync every second
    # optional, default is 2
    #
    # binlog_flush        = 2


    # binlog per-file size limit
    # optional, default is 128M, 0 means no limit
    #
    # binlog_max_log_size    = 256M


    # per-thread stack size, only affects workers=threads mode
    # optional, default is 64K
    #
    # thread_stack            = 128K


    # per-keyword expansion limit (for dict=keywords prefix searches)
    # optional, default is 0 (no limit)
    #
    # expansion_limit        = 1000


    # RT RAM chunks flush period
    # optional, default is 0 (no periodic flush)
    #
    # rt_flush_period        = 900


    # query log file format
    # optional, known values are plain and sphinxql, default is plain
    #
    # query_log_format        = sphinxql


    # version string returned to MySQL network protocol clients
    # optional, default is empty (use Sphinx version)
    #
    # mysql_version_string    = 5.0.37
    mysql_version_string = 5.6.16

    # trusted plugin directory
    # optional, default is empty (disable UDFs)
    #
    # plugin_dir            = /usr/local/sphinx/lib


    # default server-wide collation
    # optional, default is libc_ci
    #
    # collation_server        = utf8_general_ci
    collation_server = utf8_general_ci

    # server-wide locale for libc based collations
    # optional, default is C
    #
    # collation_libc_locale    = ru_RU.UTF-8
    collation_libc_locale = zh_cn.UTF-8

    # threaded server watchdog (only used in workers=threads mode)
    # optional, values are 0 and 1, default is 1 (watchdog on)
    #
    # watchdog                = 1

    
    # SphinxQL compatibility mode (legacy columns and their names)
    # optional, default is 0 (SQL compliant syntax and result sets)
    #
    # compat_sphinxql_magics    = 1
}

# --eof--

sphinx/coreseek 常见的错误处理

安装

远程连接

附上一份配置文件样板

你可能感兴趣的:(sphinx,coreseek)