hongtoushizi

coreseek的csft.conf配置文件（原创）

#

# Sphinx configuration file sample

#

# WARNING! While this sample file mentions all available options,

# it contains (very) short helper descriptions only. Please refer to

# doc/sphinx.html for details.

#

#############################################################################

以下是本人亲测并成功部署了，前面文章有了关于coreseek的安装总结。

## data source definition

#############################################################################

source main

{

# data source type. mandatory, no default value

# known types are mysql, pgsql, mssql, xmlpipe, xmlpipe2, odbc

type= mysql

#####################################################################

## SQL settings (for 'mysql' and 'pgsql' types)

#####################################################################

# some straightforward parameters for SQL source types

sql_host= localhost

sql_user= root

sql_pass= kuulabu123

sql_db= test

sql_port= 3306# optional, default is 3306

# UNIX socket name

# optional, default is empty (reuse client library defaults)

# usually '/var/lib/mysql/mysql.sock' on Linux

# usually '/tmp/mysql.sock' on FreeBSD

#

sql_sock= /tmp/mysql.sock

# MySQL specific client connection flags

# optional, default is 0

#

# mysql_connect_flags= 32 # enable compression

# MySQL specific SSL certificate settings

# optional, defaults are empty

#

# mysql_ssl_cert= /etc/ssl/client-cert.pem

# mysql_ssl_key= /etc/ssl/client-key.pem

# mysql_ssl_ca= /etc/ssl/cacert.pem

# MS SQL specific Windows authentication mode flag

# MUST be in sync with charset_type index-level setting

# optional, default is 0

#

# mssql_winauth= 1 # use currently logged on user credentials

# MS SQL specific Unicode indexing flag

# optional, default is 0 (request SBCS data)

#

# mssql_unicode= 1 # request Unicode data from server

# ODBC specific DSN (data source name)

# mandatory for odbc source type, no default value

#

# odbc_dsn= DBQ=C:\data;DefaultDir=C:\data;Driver={Microsoft Text Driver (*.txt; *.csv)};

# sql_query= SELECT id, data FROM documents.csv

# pre-query, executed before the main fetch query

# multi-value, optional, default is empty list of queries

#

sql_query_pre= SET NAMES utf8

sql_query_pre= SET SESSION query_cache_type=OFF

# main document fetch query

# mandatory, integer document ID field MUST be the first selected column

sql_query = select id ,title ,content from post

# range query setup, query that must return min and max ID values

# optional, default is empty

#

# sql_query will need to reference $start and $end boundaries

# if using ranged query:

#

# sql_query= \

#SELECT doc.id, doc.id AS group, doc.title, doc.data \

#FROM documents doc \

#WHERE id>=$start AND id<=$end

#

# sql_query_range= SELECT MIN(id),MAX(id) FROM documents

# range query step

# optional, default is 1024

#

# sql_range_step= 1000

# unsigned integer attribute declaration

# multi-value (an arbitrary number of attributes is allowed), optional

# optional bit size can be specified, default is 32

#

# sql_attr_uint= author_id

# sql_attr_uint= forum_id:9 # 9 bits for forum_id

#sql_attr_uint= group_id

# boolean attribute declaration

# multi-value (an arbitrary number of attributes is allowed), optional

# equivalent to sql_attr_uint with 1-bit size

#

# sql_attr_bool= is_deleted

# bigint attribute declaration

# multi-value (an arbitrary number of attributes is allowed), optional

# declares a signed (unlike uint!) 64-bit attribute

#

# sql_attr_bigint= my_bigint_id

# UNIX timestamp attribute declaration

# multi-value (an arbitrary number of attributes is allowed), optional

# similar to integer, but can also be used in date functions

#

# sql_attr_timestamp= posted_ts

# sql_attr_timestamp= last_edited_ts

# sql_attr_timestamp= date_added

# string ordinal attribute declaration

# multi-value (an arbitrary number of attributes is allowed), optional

# sorts strings (bytewise), and stores their indexes in the sorted list

# sorting by this attr is equivalent to sorting by the original strings

#

# sql_attr_str2ordinal= author_name

# floating point attribute declaration

# multi-value (an arbitrary number of attributes is allowed), optional

# values are stored in single precision, 32-bit IEEE 754 format

#

# sql_attr_float = lat_radians

# sql_attr_float = long_radians

# multi-valued attribute (MVA) attribute declaration

# multi-value (an arbitrary number of attributes is allowed), optional

# MVA values are variable length lists of unsigned 32-bit integers

#

# syntax is ATTR-TYPE ATTR-NAME 'from' SOURCE-TYPE [;QUERY] [;RANGE-QUERY]

# ATTR-TYPE is 'uint' or 'timestamp'

# SOURCE-TYPE is 'field', 'query', or 'ranged-query'

# QUERY is SQL query used to fetch all ( docid, attrvalue ) pairs

# RANGE-QUERY is SQL query used to fetch min and max ID values, similar to 'sql_query_range'

#

# sql_attr_multi= uint tag from query; SELECT id, tag FROM tags

# sql_attr_multi= uint tag from ranged-query; \

#SELECT id, tag FROM tags WHERE id>=$start AND id<=$end; \

#SELECT MIN(id), MAX(id) FROM tags

# post-query, executed on sql_query completion

# optional, default is empty

#

# sql_query_post=

# post-index-query, executed on successful indexing completion

# optional, default is empty

# $maxid expands to max document ID actually fetched from DB

#

# sql_query_post_index = REPLACE INTO counters ( id, val ) \

#VALUES ( 'max_indexed_id', $maxid )

# ranged query throttling, in milliseconds

# optional, default is 0 which means no delay

# enforces given delay before each query step

sql_ranged_throttle= 0

# document info query, ONLY for CLI search (ie. testing and debugging)

# optional, default is empty

# must contain $id macro and must fetch the document by that id

sql_query_info= SELECT * FROM post WHERE id=$id

# kill-list query, fetches the document IDs for kill-list

# k-list will suppress matches from preceding indexes in the same query

# optional, default is empty

#

# sql_query_killlist= SELECT id FROM documents WHERE edited>=@last_reindex

# columns to unpack on indexer side when indexing

# multi-value, optional, default is empty list

#

# unpack_zlib = zlib_column

# unpack_mysqlcompress = compressed_column

# unpack_mysqlcompress = compressed_column_2

# maximum unpacked length allowed in MySQL COMPRESS() unpacker

# optional, default is 16M

#

# unpack_mysqlcompress_maxsize = 16M

#####################################################################

## xmlpipe settings

#####################################################################

# type= xmlpipe

# shell command to invoke xmlpipe stream producer

# mandatory

#

# xmlpipe_command= cat /usr/local/coreseek/var/test.xml

#####################################################################

## xmlpipe2 settings

#####################################################################

# type= xmlpipe2

# xmlpipe_command= cat /usr/local/coreseek/var/test2.xml

# xmlpipe2 field declaration

# multi-value, optional, default is empty

#

# xmlpipe_field= subject

# xmlpipe_field= content

# xmlpipe2 attribute declaration

# multi-value, optional, default is empty

# all xmlpipe_attr_XXX options are fully similar to sql_attr_XXX

#

# xmlpipe_attr_timestamp= published

# xmlpipe_attr_uint= author_id

# perform UTF-8 validation, and filter out incorrect codes

# avoids XML parser choking on non-UTF-8 documents

# optional, default is 0

#

# xmlpipe_fixup_utf8= 1

}

# inherited source example

#

# all the parameters are copied from the parent source,

# and may then be overridden in this source definition

#source src1throttled : src1

#{

#sql_ranged_throttle= 100

#}

#############################################################################

## index definition

#############################################################################

# local index example

#

# this is an index which is stored locally in the filesystem

#

# all indexing-time options (such as morphology and charsets)

# are configured per local index

index main

{

# document source(s) to index

# multi-value, mandatory

# document IDs must be globally unique across all sources

source= main

# index files path and file name, without extension

# mandatory, path must be writable, extensions will be auto-appended

path= /usr/local/coreseek/var/data/test1

# document attribute values (docinfo) storage mode

# optional, default is 'extern'

# known values are 'none', 'extern' and 'inline'

docinfo= extern

# memory locking for cached data (.spa and .spi), to prevent swapping

# optional, default is 0 (do not mlock)

# requires searchd to be run from root

mlock= 0

# a list of morphology preprocessors to apply

# optional, default is empty

#

# builtin preprocessors are 'none', 'stem_en', 'stem_ru', 'stem_enru',

# 'soundex', and 'metaphone'; additional preprocessors available from

# libstemmer are 'libstemmer_XXX', where XXX is algorithm code

# (see libstemmer_c/libstemmer/modules.txt)

#

# morphology = stem_en, stem_ru, soundex

# morphology= libstemmer_german

# morphology= libstemmer_sv

morphology= none

# minimum word length at which to enable stemming

# optional, default is 1 (stem everything)

#

# min_stemming_len= 1

# stopword files list (space separated)

# optional, default is empty

# contents are plain text, charset_table and stemming are both applied

#

#stopwords= G:\data\stopwords.txt

# wordforms file, in "mapfrom > mapto" plain text format

# optional, default is empty

#

#wordforms= G:\data\wordforms.txt

# tokenizing exceptions file

# optional, default is empty

#

# plain text, case sensitive, space insensitive in map-from part

# one "Map Several Words => ToASingleOne" entry per line

#

# exceptions= /data/exceptions.txt

# minimum indexed word length

# default is 1 (index everything)

min_word_len= 1

# charset encoding type

# optional, default is 'sbcs'

# known types are 'sbcs' (Single Byte CharSet) and 'utf-8'

#charset_type= sbcs

charset_type = zh_cn.utf-8

charset_dictpath = /usr/local/mmseg3/etc/

# charset definition and case folding rules "table"

# optional, default value depends on charset_type

#

# defaults are configured to include English and Russian characters only

# you need to change the table to include additional ones

# this behavior MAY change in future versions

#

# 'sbcs' default value is

# charset_table= 0..9, A..Z->a..z, _, a..z, U+A8->U+B8, U+B8, U+C0..U+DF->U+E0..U+FF, U+E0..U+FF

#

# 'utf-8' default value is

# charset_table= 0..9, A..Z->a..z, _, a..z, U+410..U+42F->U+430..U+44F, U+430..U+44F

# ignored characters list

# optional, default value is empty

#

# ignore_chars= U+00AD

# minimum word prefix length to index

# optional, default is 0 (do not index prefixes)

#

# min_prefix_len= 0

# minimum word infix length to index

# optional, default is 0 (do not index infixes)

#

# min_infix_len= 0

# list of fields to limit prefix/infix indexing to

# optional, default value is empty (index all fields in prefix/infix mode)

#

# prefix_fields= filename

# infix_fields= url, domain

# enable star-syntax (wildcards) when searching prefix/infix indexes

# known values are 0 and 1

# optional, default is 0 (do not use wildcard syntax)

#

# enable_star= 1

# n-gram length to index, for CJK indexing

# only supports 0 and 1 for now, other lengths to be implemented

# optional, default is 0 (disable n-grams)

#

# ngram_len= 1

# n-gram characters list, for CJK indexing

# optional, default is empty

#

# ngram_chars= U+3000..U+2FA1F

# phrase boundary characters list

# optional, default is empty

#

# phrase_boundary= ., ?, !, U+2026 # horizontal ellipsis

# phrase boundary word position increment

# optional, default is 0

#

# phrase_boundary_step= 100

# whether to strip HTML tags from incoming documents

# known values are 0 (do not strip) and 1 (do strip)

# optional, default is 0

html_strip= 0

# what HTML attributes to index if stripping HTML

# optional, default is empty (do not index anything)

#

# html_index_attrs= img=alt,title; a=title;

# what HTML elements contents to strip

# optional, default is empty (do not strip element contents)

#

# html_remove_elements= style, script

# whether to preopen index data files on startup

# optional, default is 0 (do not preopen), searchd-only

#

# preopen= 1

# whether to keep dictionary (.spi) on disk, or cache it in RAM

# optional, default is 0 (cache in RAM), searchd-only

#

# ondisk_dict= 1

# whether to enable in-place inversion (2x less disk, 90-95% speed)

# optional, default is 0 (use separate temporary files), indexer-only

#

# inplace_enable= 1

# in-place fine-tuning options

# optional, defaults are listed below

#

# inplace_hit_gap= 0# preallocated hitlist gap size

# inplace_docinfo_gap= 0# preallocated docinfo gap size

# inplace_reloc_factor= 0.1# relocation buffer size within arena

# inplace_write_factor= 0.1# write buffer size within arena

# whether to index original keywords along with stemmed versions

# enables "=exactform" operator to work

# optional, default is 0

#

# index_exact_words= 1

# position increment on overshort (less that min_word_len) words

# optional, allowed values are 0 and 1, default is 1

#

# overshort_step= 1

# position increment on stopword

# optional, allowed values are 0 and 1, default is 1

#

# stopword_step= 1

}

# inherited index example

#

# all the parameters are copied from the parent index,

# and may then be overridden in this index definition

#index test1stemmed : test1

#{

#path= /usr/local/coreseek/var/data/test1stemmed

#morphology= stem_en

#}

# distributed index example

#

# this is a virtual index which can NOT be directly indexed,

# and only contains references to other local and/or remote indexes

#index dist1

#{

## 'distributed' index type MUST be specified

#type= distributed

#

## local index to be searched

## there can be many local indexes configured

#local= test1

#local= test1stemmed

#

## remote agent

## multiple remote agents may be specified

## syntax for TCP connections is 'hostname:port:index1,[index2[,...]]'

## syntax for local UNIX connections is '/path/to/socket:index1,[index2[,...]]'

#agent= localhost:9313:remote1

#agent= localhost:9314:remote2,remote3

## agent= /var/run/searchd.sock:remote4

#

## blackhole remote agent, for debugging/testing

## network errors and search results will be ignored

##

## agent_blackhole= testbox:9312:testindex1,testindex2

#

#

## remote agent connection timeout, milliseconds

## optional, default is 1000 ms, ie. 1 sec

#agent_connect_timeout= 1000

#

## remote agent query timeout, milliseconds

## optional, default is 3000 ms, ie. 3 sec

#agent_query_timeout= 3000

#}

#############################################################################

## indexer settings

#############################################################################

indexer

{

# memory limit, in bytes, kiloytes (16384K) or megabytes (256M)

# optional, default is 32M, max is 2047M, recommended is 256M to 1024M

mem_limit= 128M

# maximum IO calls per second (for I/O throttling)

# optional, default is 0 (unlimited)

#

# max_iops= 40

# maximum IO call size, bytes (for I/O throttling)

# optional, default is 0 (unlimited)

#

# max_iosize= 1048576

# maximum xmlpipe2 field length, bytes

# optional, default is 2M

#

# max_xmlpipe2_field= 4M

# write buffer size, bytes

# several (currently up to 4) buffers will be allocated

# write buffers are allocated in addition to mem_limit

# optional, default is 1M

#

# write_buffer= 1M

}

#############################################################################

## searchd settings

#############################################################################

searchd

{

# hostname, port, or hostname:port, or /unix/socket/path to listen on

# multi-value, multiple listen points are allowed

# optional, default is 0.0.0.0:9312 (listen on all interfaces, port 9312)

#

# listen= 127.0.0.1

# listen= 192.168.0.1:9312

# listen= 9312

# listen= /var/run/searchd.sock

# log file, searchd run info is logged here

# optional, default is 'searchd.log'

log= /usr/local/coreseek/var/log/searchd.log

# query log file, all search queries are logged here

# optional, default is empty (do not log queries)

query_log= /usr/local/coreseek/var/log/query.log

# client read timeout, seconds

# optional, default is 5

read_timeout= 5

# request timeout, seconds

# optional, default is 5 minutes

client_timeout= 300

# maximum amount of children to fork (concurrent searches to run)

# optional, default is 0 (unlimited)

max_children= 30

# PID file, searchd process ID file name

# mandatory

pid_file= /usr/local/coreseek/var/log/searchd.pid

# max amount of matches the daemon ever keeps in RAM, per-index

# WARNING, THERE'S ALSO PER-QUERY LIMIT, SEE SetLimits() API CALL

# default is 1000 (just like Google)

max_matches= 1000

# seamless rotate, prevents rotate stalls if precaching huge datasets

# optional, default is 1

seamless_rotate= 1

# whether to forcibly preopen all indexes on startup

# optional, default is 0 (do not preopen)

preopen_indexes= 0

# whether to unlink .old index copies on succesful rotation.

# optional, default is 1 (do unlink)

unlink_old= 1

# attribute updates periodic flush timeout, seconds

# updates will be automatically dumped to disk this frequently

# optional, default is 0 (disable periodic flush)

#

# attr_flush_period= 900

# instance-wide ondisk_dict defaults (per-index value take precedence)

# optional, default is 0 (precache all dictionaries in RAM)

#

# ondisk_dict_default= 1

# MVA updates pool size

# shared between all instances of searchd, disables attr flushes!

# optional, default size is 1M

mva_updates_pool= 1M

# max allowed network packet size

# limits both query packets from clients, and responses from agents

# optional, default size is 8M

max_packet_size= 8M

# crash log path

# searchd will (try to) log crashed query to 'crash_log_path.PID' file

# optional, default is empty (do not create crash logs)

#

# crash_log_path= /usr/local/coreseek/var/log/crash

# max allowed per-query filter count

# optional, default is 256

max_filters= 256

# max allowed per-filter values count

# optional, default is 4096

max_filter_values= 4096

# socket listen queue length

# optional, default is 5

#

# listen_backlog= 5

# per-keyword read buffer size

# optional, default is 256K

#

# read_buffer= 256K

# unhinted read size (currently used when reading hits)

# optional, default is 32K

#

# read_unhinted= 32K

}

# --eof--

你可能感兴趣的:(coreseek)

Sphinx到Coreseek安装全解云天河Blog MYSQL PHP Linux shpinx coreseek 中文分词全文索引
此次采用的是CentOS6.532位如有出入，请参阅相关配置手册说明——@Author云天河BlogSphinx部分简介Sphinx是一个基于SQL的全文检索引擎，可以结合MySQL,PostgreSQL做全文搜索，它可以提供比数据库本身更专业的搜索功能，使得应用程序更容易实现专业化的全文检索。Sphinx特别为一些脚本语言设计搜索API接口，如PHP,Python,Perl,Ruby等，同时为M
给全文搜索引擎Manticore (Sphinx) search 增加中文分词冰糖葫芦加冰
Sphinxsearch是一款非常棒的开源全文搜索引擎，它使用C++开发，索引和搜索的速度非常快，我使用sphinx的时间也有好多年了。最初使用的是coreseek，一个国人在sphinxsearch基础上添加了mmseg分词的搜索引擎，可惜后来不再更新，sphinxsearch的版本太低，bug也会出现；后来也使用最新的sphinxsearch，它可以支持几乎所有语言，通过其内置的ngramto
全文搜索引擎-sphinx及xunsearch 云窗96
使用全文搜索引擎站内搜索类型1、sphinx(斯芬克斯)sphinx不支持中文，coreseek(基于sphinx，并且支持中文)2、lucenejava支持好，为php也提供方法3、xunsearch(迅搜)国产、仅支持phpsphinx的使用优点：高速的建立索引可达到10M/s高性能的搜索(在2-4G的文本数据上，平均每次检索响应的时间小于0.1秒)可处理海量数据(目前已知可以处理100G的文
Sphinx搜索引擎 Coreseek 中文分词搜索引擎安装使用,站内搜索一件小毛衣
Sphinx搜索引擎Coreseek中文分词搜索引擎安装使用，站内搜索，php简单使用；一.简介Coreseek是一款中文全文检索开源软件，基于Sphinx研发并独立发布，专攻中文搜索和信息处理领域，适用于行业/垂直搜索、论坛/站内搜索、数据库搜索、文档/文献检索、信息检索、数据挖掘等应用场景。相比Sphinx，Coreseek增加了一个带有中文分司的词库。二.Coreseek下载wgethttp
coreseek安装使用 school_1087
本文引自:http://www.phperz.com/article/14/0615/95.htmlsphinx的安装使用，其实都是大同小异，以下以coreseek安装为例Coreseek是一款中文全文检索/搜索软件，基于Sphinx研发并独立发布，专攻中文搜索和信息处理领域，因为sphinx不支持分词，所以用coreseek特别适合，现在用的是Coreseek3.2.14稳定版,目前暂时没有新的
Sphinx实时搜索设计探讨 JobinLi
背景Sphinx是一个全文搜索引擎，虽然官方没对中文分词检索做直接支持，但是配合coreseek还是能很好地实现中文全文检索的。至于分词结果，不本文讨论范围内，本文主要针对Sphinx的实时搜索能力进行探讨。用过Sphinx的应该都知道，虽然提供了更新属性的接口(php中是UpdateAttributes函数)，但是却无法对文本类型字段进行更新。本文主要以PHP来进行实际操作示范。题外话：其实El
使用python测试sphinx(coreseek)做全文索引 lpj24
coreseek是一个基础sphinx的中文索引工具，我今天写一下关于使用python做测试的方式。1.coreseek安装，连接mysql的配置此处不赘述，很多人写过这方面的博客，单纯讲python连接sphinx.当然我们先启动sphinx进程，./searched2.python要连接sphinx肯定是需要第三方库的，你可以去pypi找，当然sphinx已经告诉我们了，我们去/home/lp
coreseek+php之sphinx扩展安装+php调用示例张清柏
[参考资料](https://blog.csdn.net/moqiang02/article/details/42027243)[关于排序和分而已,及sphinxapi.php的调用](https://blog.csdn.net/joyatonce/article/details/52059564)首先了解sphinx是什么?他是一个服务,用于搜索的服务,也可以说是一个搜索引擎,如果使用搜索引擎,
如何在thinkphp框架下使用coreseek？ LiChangBao
1.进入到coreseek的api路径（D:\coreseek-3.2.14-win32\api）复制sphinxapi.php文件；2.进入到thinkphp框架的Vendor文件夹下，把上一步复制的sphinxapi.php文件粘贴到此处，并重命名为Sphinx.class.php;3.在需要使用sphinx的PHP文件中导入sphinx类库（即：import('Vendor.Sphinx')
coreseek windows下服务 FATAL: Tokenizer initialization failure 解决办法 ksr12333 coreseek
coreseek命令行模式一切正常在windows下安装成服务启动正常，但是一使用，就会出现没有结果的错误，再去服务里一看，服务自动停止了（出错了）去日志看下FATAL:Tokenizerinitializationfailure发现这个百度google后，发现都是说windows下要用绝对路径，并且在路径里要用“/"代替"\"这些都实验了，无效正准备用源码试试看到底错误是什么原因忽然想到一点我在
mysql + sphinx 安装过程详解 zhsj0110 sphinx
参考：http://www.cnblogs.com/chenzehe/archive/2010/11/04/1868354.htmlhttp://www.coreseek.cn/news/7/99/http://klinmy.blog.163.com/blog/static/5680802008428445716/http://love3400wind.blog.163.com/blog/stat
Sphinx 安装记录阳光梦搜索引擎
前言如果你想支持中文全文检索的话，请参考coreseek安装记录。如果你已经安装sphinx或者coreseek,只是想查找怎么配置和使用sphinx和coreseek的话，请参考coreeek和sphinx的配置与使用下面的表纯属文章虚构，由于配置内容较多，部分省略，具体可以参考官方文档。想吐槽一句：编译安装真浪费时间，configure&&make&&makeinstall一个软件就需要几十分
如何给mysql innodb表添加Sphinx中文快速搜索支持 xie156005934
由于innodb表不能做全文索引，因此模糊查询的效率很低，几十万行的数据like查询一般是分钟级的，而sphinx则很好的解决了这个问题，可以在0.几秒内完成这个查询，下面看下如何给innodb表添加sphinx支持一、安装sphinx官方下载地址：http://sphinxsearch.com/downloads/archive/由于coreseek4.1要求2.0.2的版本，我的操作系统是ce
mysql安装sphinx引擎 weixin_34143774 数据库
一般情况下，我们在使用数据库的时候使用的引擎是InnoDB或者是MYISAM默认安装会把一些其它的引擎给安装上，但是不会安装sphinx.(我测试的数据库是5.6使用的cmake)使用showengines命令可以查看mysql数据库的引擎如果你是用的是coreseek，直接拷贝的mysqlse那么可能不会编译成功，可能会出现一些版本不兼容的错误。一般到sphinx官方网站上下载sphinx，解压
php + MongoDB + Sphinx 实现全文检索 (一) _Lyux php
现状:Sphinx目前的稳定版本为2.2.11.Sphinx目前对英文等字母语言采用空格分词,故其对中文分词支持不好,目前官方中文分词方案仅支持按单字分词.在Sphinx基础上,目前国内有两个中文分词解决方案,一个是sphinx-for-chinese,一个是coreseek.sphinx-for-chinese没有官网,文档较少,可查到的最新版本可支持sphinx1.10.coreseek官方还
安装coreseek全文检索服务器 wangjun_1218 mysql linux综合全文检索 mysql 测试服务器 opera gcc
==安装coreseek全文检索服务器==root:groupaddxmpuseradd-gxmp-d/home/coreseek-m-s/bin/bashcoreseekpasswdcoreseekcoreseek:cd~mkdirlocalfile;cdlocalfilewgethttp://www.coreseek.cn/uploads/csft/3.2/coreseek-3.2.14.ta
sphinx索引工具的使用 yt_php 工具的使用
介绍mysql全文索引有个缺点是经常使用的词汇不会建立索引而且不支持中文，sphinx提供了比数据库本身更专业的搜索功能。1高速的建立索引（创建100万条索引只需3~4分钟）2高性能搜索（一千万条查询速度为毫秒级）3处理海量数据（单一索引最大可包含一亿条记录）4优秀的相关度算法但是sphinx只支持英文与俄文，这里我们需要另一款软件，coreseek，基于sphinx，添加了中文词库，专攻中文搜索
sphinx原理以及索引流程 wuliZs_
在使用mysql数据库过程中，如果想实现全文检索的优化，可以使用mysql自带全文索引，但是不支持中文。。关于sphinx的安装网上很多教程写的都不错比如：http://www.coreseek.cn/products-install/。这里就不再说明安装方法了。有兴趣的可以自己参考。MySQL在高并发连接、数据库记录数较多的情况下，SELECT...WHERE...LIKE'%...%'的全文搜
sphinx 原理及实现钻石王小二吼吼吼 mysql优化
在使用mysql数据库过程中，如果想实现全文检索的优化，可以使用mysql自带全文索引，但是不支持中文。。关于sphinx的安装网上很多教程写的都不错比如：http://www.coreseek.cn/products-install/。这里就不再说明安装方法了。有兴趣的可以自己参考。MySQL在高并发连接、数据库记录数较多的情况下，SELECT...WHERE...LIKE'%...%'的全文搜
sphinx应用项目实践踩坑记 Winner-雪花飘技术 php 服务器
之前写的关于sphinx环境及使用的文章只是简单的做个小demo的测试，真正应用到项目中会遇到各种问题袭来，没有leader指导，只能自己摸索，应用到项目上线也是花了好几天的时间，这次记录只要是指出各个坑点，避免日后再踩坑。环境搭建docker如何安装在这里就不详细说了，有了docker环境后使用dockersearchsunfjun/coreseek，笔者使用的docker源是阿里源vi/etc
coreseek下相关知识（匹配模式、排序模式、指定字段查询、多字段查询、指定字段内容高亮、增量索引实时更新，其他技巧）总结篇 qiuyu6958334 php 搜索引擎Coreseek
从标题看，这几个都是重点知识，把这些都学会了，coreseek基本上使用无障碍了，所以很重要！！匹配模式1、SPH_MATCH_ALL,匹配所有查询词(默认模式);2、SPH_MATCH_ANY,匹配查询词中的任意一个;3、SPH_MATCH_PHRASE,将整个查询看作一个词组，要求按顺序完整匹配;4、SPH_MATCH_BOOLEAN,将查询看作一个布尔表达式(参见第5.2节“布尔查询语法”)
sphinx全等匹配（等于）查询的语法 gianttj
sphinx全等匹配的语法，以下用sphinxSE语法作为示例：SELECT*FROM`attribute_content_search`WHEREquery='"^感冒$"|"^便秘$";mode=extended2;limit=1000'limit1000;字段开始和字段结束修饰符(在版本Coreseek3.1/Sphinx0.9.9-rc2中引入)，其中“^”为开始符，“$”为结尾符，有点类
sphinx多条件搜索阿瑟·柯南·道尔 Sphinx
1、sphinx多条件搜索创建索引、开启搜索服务，以下是我创建的表单，option标签的value微数据库字段request->csrfToken?>"/>搜索字段：全部标题作者作者：2、然后将coreseek中的csft_mysql.conf类方法yii框架的yii\vendor\composer\下在yii\vendor\autoload.php中添加require_once__DIR__.'
Sphinx + Coreseek 实现中文分词搜索 Sponge_CMZ php
Sphinx+Coreseek实现中文分词搜索SphinxCoreseek实现中文分词搜索全文检索1全文检索vs数据库2中文检索vs汉化检索3自建全文搜索与使用Google等第三方网站提供的站内全文搜索的区别SphinxCoreseek介绍Coreseek安装使用1.全文检索1.1全文检索vs.数据库全文检索是数据库的有力补充，全文检索并不能替代数据库在应用系统中的作用。当应用系统的数据以大量的文
浅谈Coreseek、Sphinx-for-chinaese、Sphinx+Scws的区别 Huangwenting1990
Sphinx是一个基于SQL的全文检索引擎；普遍使用于很多网站Sphinx的特性如下：a)高速的建立索引(在当代CPU上，峰值性能可达到10MB/秒);b)高性能的搜索(在2–4GB的文本数据上，平均每次检索响应时间小于0.1秒);c)可处理海量数据(目前已知可以处理超过100GB的文本数据,在单一CPU的系统上可处理100M文档);Sphinx本身对中文的支持并不好。主要体现在对一段话断词；英文
Coreseek算法分析内核中的洋葱搜索
Coreseek算法分析本文对coreseek代码中涉及到的一部分算法进行说明，以便在阅读代码的时候，能更容易理解相关的代码。本文所整理的只是其中的部分算法，后面将在逐渐深入理解的基础上，进一步添加。一．Soundex算法1.算法原理Soundex是一种语音算法，利用英文字的读音计算近似值，值由四个字符构成，第一个字符为英文字母，后三个为数字。在拼音文字中有时会有会念但不能拼出正确字的情形，可用S
手把手搭建sphinx环境内核中的洋葱搜索
手把手搭建sphinx环境1一．概述1二．虚拟机的搭建21.安装virtualbox22.准备suse的安装环境23.安装suse44.配置共享文件夹55.设置网络7三．Mysql的安装81.获取mysql源代码82.获取sphinx源码93.增加sphinxSE引擎94.配置编译95.启动和测试96.系统自动启动mysql10四．Sphinx的搭建101.解压coreseek102.首先安装MM
研究了coreseek下的sphinx 配置及api调用，收获颇多。任亚军系统架构 api sql query float lucene unix
前言：之前一直使用lucene，有很多优点及缺点，最大的缺点就是要维护一个索引的成本很高，需要牵扯到很多方面，其中也包含业务方面；优点呢，不用多说了，速度快，支持查询的模式多，各种条件下的查询都能实现，所以想找一个更加符合现有应用状况的搜索引擎，故想到了coreseek=(sphinx+中文分词+框架)Sphinx最大的好处是业务层面不需要你去关心索引的建立、更新等，后台定时去维护主索引和增量索引
nginx开启关闭shell diwan2439
#!/bin/sh#file:/usr/local/bin/sphinx./etc/rc.d/init.d/functionsappName="Sphinx"stop(){/usr/local/bin/searchd-c/usr/local/coreseek/etc/sphinx.conf--stop>/dev/null2>&1ret=$?if[$ret-eq0];thenaction$"Stop
Centos 编译安装sphinx-0.9.9全文检索 YPHP coreseek sphinx php centos
Sphinx简介Sphinx是开源的搜索引擎，它支持英文的全文检索。所以如果单独搭建Sphinx，你就已经可以使用全文索引了。但是往往我们要求的是中文索引，怎么做呢？国人提供了一个可供企业使用的，基于Sphinx的中文全文检索引擎。也就是说Coreseek实际上的内核还是Sphinx。那么他们的版本对应呢？sphinx可以通过设置为“一元切分模式”来支持搜索中文在实际使用中，搜索非中文的话，sph
怎么样才能成为专业的程序员？ cocos2d-x小菜编程 PHP
如何要想成为一名专业的程序员？仅仅会写代码是不够的。从团队合作去解决问题到版本控制，你还得具备其他关键技能的工具包。当我们询问相关的专业开发人员，那些必备的关键技能都是什么的时候，下面是我们了解到的情况。关于如何学习代码，各种声音很多，然后很多人就被误导为成为专业开发人员懂得一门编程语言就够了？！呵呵，就像其他工作一样，光会一个技能那是远远不够的。如果你想要成为
java web开发高并发处理 BreakingBad java Web 并发开发处理高
java处理高并发高负载类网站中数据库的设计方法（java教程,java处理大量数据，java高负载数据）一：高并发高负载类网站关注点之数据库没错,首先是数据库,这是大多数应用所面临的首个SPOF。尤其是Web2.0的应用，数据库的响应是首先要解决的。一般来说MySQL是最常用的，可能最初是一个mysql主机，当数据增加到100万以上，那么，MySQL的效能急剧下降。常用的优化措施是M-S（
mysql批量更新 ekian mysql
mysql更新优化：一版的更新的话都是采用update set的方式，但是如果需要批量更新的话，只能for循环的执行更新。或者采用executeBatch的方式，执行更新。无论哪种方式，性能都不见得多好。三千多条的更新，需要3分多钟。查询了批量更新的优化，有说replace into的方式，即： replace into tableName(id,status) values
微软BI（3） 18289753290 微软BI SSIS
1) Q：该列违反了完整性约束错误；已获得 OLE DB 记录。源:“Microsoft SQL Server Native Client 11.0” Hresult: 0x80004005 说明:“不能将值 NULL 插入列 'FZCHID'，表 'JRB_EnterpriseCredit.dbo.QYFZCH'；列不允许有 Null 值。INSERT 失败。”。 A：一般这类问题的存在是
Java中的List g21121 java
List是一个有序的 collection（也称为序列）。此接口的用户可以对列表中每个元素的插入位置进行精确地控制。用户可以根据元素的整数索引（在列表中的位置）访问元素，并搜索列表中的元素。与 set 不同，列表通常允许重复
读书笔记永夜-极光读书笔记
1. K是一家加工厂,需要采购原材料,有A,B,C,D 4家供应商,其中A给出的价格最低,性价比最高,那么假如你是这家企业的采购经理,你会如何决策? 传统决策: A:100%订单 B,C,D:0% &nbs
centos 安装 Codeblocks 随便小屋 codeblocks
1.安装gcc,需要c和c++两部分,默认安装下,CentOS不安装编译器的,在终端输入以下命令即可yum install gccyum install gcc-c++ 2.安装gtk2-devel,因为默认已经安装了正式产品需要的支持库,但是没有安装开发所需要的文档.yum install gtk2* 3. 安装wxGTK yum search w
23种设计模式的形象比喻 aijuans 设计模式
1、ABSTRACT FACTORY—追MM少不了请吃饭了，麦当劳的鸡翅和肯德基的鸡翅都是MM爱吃的东西，虽然口味有所不同，但不管你带MM去麦当劳或肯德基，只管向服务员说“来四个鸡翅”就行了。麦当劳和肯德基就是生产鸡翅的Factory 　　工厂模式：客户类和工厂类分开。消费者任何时候需要某种产品，只需向工厂请求即可。消费者无须修改就可以接纳新产品。缺点是当产品修改时，工厂类也要做相应的修改。如：
开发管理 CheckLists aoyouzi 开发管理 CheckLists
开发管理 CheckLists(23) -使项目组度过完整的生命周期开发管理 CheckLists(22) -组织项目资源开发管理 CheckLists(21) -控制项目的范围开发管理 CheckLists(20) -项目利益相关者责任开发管理 CheckLists(19) -选择合适的团队成员开发管理 CheckLists(18) -敏捷开发 Scrum Master 工作开发管理 C
js实现切换百合不是茶 JavaScript 栏目切换
js主要功能之一就是实现页面的特效,窗体的切换可以减少页面的大小,被门户网站大量应用思路: 1,先将要显示的设置为display:bisible 否则设为none 2,设置栏目的id ,js获取栏目的id,如果id为Null就设置为显示 3,判断js获取的id名字;再设置是否显示代码实现: html代码: <di
周鸿祎在360新员工入职培训上的讲话 bijian1013 感悟项目管理人生职场
这篇文章也是最近偶尔看到的，考虑到原博客发布者可能将其删除等原因，也更方便个人查找，特将原文拷贝再发布的。“学东西是为自己的，不要整天以混的姿态来跟公司博弈，就算是混，我觉得你要是能在混的时间里，收获一些别的有利于人生发展的东西，也是不错的，看你怎么把握了”，看了之后，对这句话记忆犹新。 &
前端Web开发的页面效果 Bill_chen html Web Microsoft
1.IE6下png图片的透明显示： <img src="图片地址" border="0" style="Filter.Alpha(Opacity)=数值(100),style=数值(3)"/> 或在<head></head>间加一段JS代码让透明png图片正常显示。 2.<li>标
【JVM五】老年代垃圾回收：并发标记清理GC(CMS GC) bit1129 垃圾回收
CMS概述并发标记清理垃圾回收(Concurrent Mark and Sweep GC）算法的主要目标是在GC过程中，减少暂停用户线程的次数以及在不得不暂停用户线程的请夸功能，尽可能短的暂停用户线程的时间。这对于交互式应用，比如web应用来说，是非常重要的。 CMS垃圾回收针对新生代和老年代采用不同的策略。相比同吞吐量垃圾回收，它要复杂的多。吞吐量垃圾回收在执
Struts2技术总结白糖_ struts2
必备jar文件早在struts2.0.*的时候，struts2的必备jar包需要如下几个： commons-logging-*.jar Apache旗下commons项目的log日志包 freemarker-*.jar
Jquery easyui layout应用注意事项 bozch jquery 浏览器 easyui layout
在jquery easyui中提供了easyui-layout布局，他的布局比较局限，类似java中GUI的border布局。下面对其使用注意事项作简要介绍：如果在现有的工程中前台界面均应用了jquery easyui，那么在布局的时候最好应用jquery eaysui的layout布局，否则在表单页面（编辑、查看、添加等等）在不同的浏览器会出
java-拷贝特殊链表：有一个特殊的链表，其中每个节点不但有指向下一个节点的指针pNext，还有一个指向链表中任意节点的指针pRand，如何拷贝这个特殊链表？ bylijinnan java
public class CopySpecialLinkedList { /** * 题目：有一个特殊的链表，其中每个节点不但有指向下一个节点的指针pNext，还有一个指向链表中任意节点的指针pRand，如何拷贝这个特殊链表？拷贝pNext指针非常容易，所以题目的难点是如何拷贝pRand指针。假设原来链表为A1 -> A2 ->... -> An，新拷贝
color Chen.H JavaScript html css
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> <HTML> <HEAD>&nbs
[信息与战争]移动通讯与网络 comsci 网络
两个坚持:手机的电池必须可以取下来光纤不能够入户,只能够到楼宇建议大家找这本书看看:<&
oracle flashback query(闪回查询) daizj oracle flashback query flashback table
在Oracle 10g中，Flash back家族分为以下成员： Flashback Database Flashback Drop Flashback Table Flashback Query(分Flashback Query,Flashback Version Query，Flashback Transaction Query) 下面介绍一下Flashback Drop 和Flas
zeus持久层DAO单元测试 deng520159 单元测试
zeus代码测试正紧张进行中,但由于工作比较忙,但速度比较慢.现在已经完成读写分离单元测试了,现在把几种情况单元测试的例子发出来,希望有人能进出意见,让它走下去. 本文是zeus的dao单元测试: 1.单元测试直接上代码 package com.dengliang.zeus.webdemo.test; import org.junit.Test; import o
C语言学习三printf函数和scanf函数学习 dcj3sjt126com c printf scanf language
printf函数 /* 2013年3月10日20:42:32 地点：北京潘家园功能：目的：测试%x %X %#x %#X的用法 */ # include <stdio.h> int main(void) { printf("哈哈！\n"); // \n表示换行 int i = 10; printf
那你为什么小时候不好好读书? dcj3sjt126com life
dady, 我今天捡到了十块钱, 不过我还给那个人了 good girl! 那个人有没有和你讲thank you啊没有啦....他拉我的耳朵我才把钱还给他的, 他哪里会和我讲thank you 爸爸, 如果地上有一张5块一张10块你拿哪一张呢.... 当然是拿十块的咯... 爸爸你很笨的, 你不会两张都拿爸爸为什么上个月那个人来跟你讨钱, 你告诉他没
iptables开放端口 Fanyucai linux iptables 端口
1，找到配置文件 vi /etc/sysconfig/iptables 2，添加端口开放，增加一行，开放18081端口 -A INPUT -m state --state NEW -m tcp -p tcp --dport 18081 -j ACCEPT 3，保存 ESC :wq! 4，重启服务 service iptables
Ehcache（05）——缓存的查询 234390216 排序 ehcache 统计 query
缓存的查询目录 1. 使Cache可查询 1.1 基于Xml配置 1.2 基于代码的配置 2 指定可搜索的属性 2.1 可查询属性类型 2.2 &
通过hashset找到数组中重复的元素 jackyrong hashset
如何在hashset中快速找到重复的元素呢?方法很多，下面是其中一个办法： int[] array = {1,1,2,3,4,5,6,7,8,8}; Set<Integer> set = new HashSet<Integer>(); for(int i = 0
使用ajax和window.history.pushState无刷新改变页面内容和地址栏URL lanrikey history
后退时关闭当前页面 <script type="text/javascript"> jQuery(document).ready(function ($) { if (window.history && window.history.pushState) {
应用程序的通信成本 netkiller.github.com 虚拟机应用服务器陈景峰 netkiller neo
应用程序的通信成本什么是通信一个程序中两个以上功能相互传递信号或数据叫做通信。什么是成本这是是指时间成本与空间成本。时间就是传递数据所花费的时间。空间是指传递过程耗费容量大小。都有哪些通信方式全局变量线程间通信共享内存共享文件管道 Socket 硬件（串口，USB）等等全局变量全局变量是成本最低通信方法，通过设置
一维数组与二维数组的声明与定义恋洁e生二维数组一维数组定义声明初始化
/** * */ package test20111005; /** * @author FlyingFire * @date:2011-11-18 上午04:33:36 * @author ：代码整理 * @introduce :一维数组与二维数组的初始化 *summary： */ public c
Spring Mybatis独立事务配置 toknowme mybatis
在项目中有很多地方会使用到独立事务，下面以获取主键为例（1）修改配置文件spring-mybatis.xml  <tx:annotation-driven transaction-manager="transactionManager" /> &n
更新Anadroid SDK Tooks之后，Eclipse提示No update were found xp9802 eclipse
使用Android SDK Manager 更新了Anadroid SDK Tooks 之后，打开eclipse提示 This Android SDK requires Android Developer Toolkit version 23.0.0 or above, 点击Check for Updates 检测一会后提示 No update were found

按字母分类： A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 其他