Python之路:Spyder模块之Sphinx

本篇讲述Spyder环境所依赖的模块Sphinx搜索引擎,与大家分享。


  • Sphinx
  • 下载安装
    Python之路:Spyder模块之Sphinx_第1张图片

  • 在根目录下需要创建几个文件夹
    /data 、/log
    并在/bin下创建编辑sphinx.conf

source src1
{
    type                = mysql
    sql_host            = ip
    sql_user            = root
    sql_pass            = root
    sql_db              = test
    sql_port            = 3306

    sql_query           = SELECT id, name, description, UNIX_TIMESTAMP(created_at) AS created_at, group_id, rating FROM links
    #sql_attr_uint       = group_id
    #sql_attr_timestamp  = created_at
    #sql_attr_float      = rating
    #sql_attr_multi      = uint tags from query; SELECT link_id, tag_id FROM links_tags
    #sql_query_info      = SELECT * FROM links WHERE id=$id
    sql_query_pre       = SET NAMES utf8 
}

index test1
{
    source              = src1
    path                = D:/data/sphinx-3.0.3/data/
    #docinfo             = extern
    morphology          = none
    stopwords           =
    #charset_type        = utf-8
    #添加utf-8编码表
    charset_table   = 0..9, A..Z->a..z, _, a..z, U+410..U+42F->U+430..U+44F, U+430..U+44F
    #简单分词,搜索中文必须设置为1(只有0和1两个选项)
    ngram_len = 1
    # 需要分词的字符,搜索中文时必须
    ngram_chars = U+3000..U+2FA1F 
}

searchd
{
    # [hostname:]port[:protocol], or /unix/socket/path to listen on
    # known protocols are 'sphinx' (SphinxAPI) and 'mysql41' (SphinxQL)
    #
    # multi-value, multiple listen points are allowed
    # optional, defaults are 9312:sphinx and 9306:mysql41, as below
    #
    # listen            = 127.0.0.1
    # listen            = 192.168.0.1:9312
    # listen            = 9312
    # listen            = /var/run/searchd.sock
    listen          = 9312
    listen          = 9306:mysql41

    # log file, searchd run info is logged here
    # optional, default is 'searchd.log'
    log         = D:/data/sphinx-3.0.3/log/searchd.log

    # query log file, all search queries are logged here
    # optional, default is empty (do not log queries)
    query_log       = D:/data/sphinx-3.0.3/log/query.log

    # client read timeout, seconds
    # optional, default is 5
    read_timeout        = 5

    # request timeout, seconds
    # optional, default is 5 minutes
    client_timeout      = 300

    # maximum amount of children to fork (concurrent searches to run)
    # optional, default is 0 (unlimited)
    max_children        = 30

    # maximum amount of persistent connections from this master to each agent host
    # optional, but necessary if you use agent_persistent. It is reasonable to set the value
    # as max_children, or less on the agent's hosts.
    persistent_connections_limit    = 30

    # PID file, searchd process ID file name
    # mandatory
    pid_file        = D:/data/sphinx-3.0.3/log/searchd.pid

    # seamless rotate, prevents rotate stalls if precaching huge datasets
    # optional, default is 1
    seamless_rotate     = 1

    # whether to forcibly preopen all indexes on startup
    # optional, default is 1 (preopen everything)
    preopen_indexes     = 1

    # whether to unlink .old index copies on succesful rotation.
    # optional, default is 1 (do unlink)
    unlink_old      = 1

    # MVA updates pool size
    # shared between all instances of searchd, disables attr flushes!
    # optional, default size is 1M
    mva_updates_pool    = 1M

    # max allowed network packet size
    # limits both query packets from clients, and responses from agents
    # optional, default size is 8M
    max_packet_size     = 8M

    # max allowed per-query filter count
    # optional, default is 256
    max_filters     = 256

    # max allowed per-filter values count
    # optional, default is 4096
    max_filter_values   = 4096

    # max allowed per-batch query count (aka multi-query count)
    # optional, default is 32
    max_batch_queries   = 32

    # multi-processing mode (MPM)
    # known values are none, fork, prefork, and threads
    # threads is required for RT backend to work
    # optional, default is threads
    workers         = threads # for RT to work
}

  • MySQL准备 ,执行脚本:

SET FOREIGN_KEY_CHECKS=0;

-- ----------------------------
-- Table structure for documents
-- ----------------------------
DROP TABLE IF EXISTS `documents`;
CREATE TABLE `documents` (
  `id` int(11) NOT NULL AUTO_INCREMENT,
  `group_id` int(11) NOT NULL,
  `group_id2` int(11) NOT NULL,
  `date_added` datetime NOT NULL,
  `title` varchar(255) NOT NULL,
  `content` text NOT NULL,
  PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=5 DEFAULT CHARSET=utf8;

-- ----------------------------
-- Records of documents
-- ----------------------------
INSERT INTO `documents` VALUES ('1', '1', '5', '2018-06-29 11:41:38', 'test one', 'this is my test document number one. also checking search within phrases.');
INSERT INTO `documents` VALUES ('2', '1', '6', '2018-06-29 11:41:38', 'test two', 'this is my test document number two');
INSERT INTO `documents` VALUES ('3', '2', '7', '2018-06-29 11:41:38', 'another doc', 'this is another group');
INSERT INTO `documents` VALUES ('4', '2', '8', '2018-06-29 11:41:38', 'doc number four', 'this is to test groups');

-- ----------------------------
-- Table structure for links
-- ----------------------------
DROP TABLE IF EXISTS `links`;
CREATE TABLE `links` (
  `id` int(11) NOT NULL AUTO_INCREMENT,
  `name` varchar(255) NOT NULL,
  `created_at` datetime NOT NULL,
  `description` text,
  `group_id` int(11) NOT NULL,
  `rating` float NOT NULL,
  PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=4 DEFAULT CHARSET=utf8;

-- ----------------------------
-- Records of links
-- ----------------------------
INSERT INTO `links` VALUES ('1', 'Paint Protects WiFi Network from Hackers', '2007-04-04 06:48:10', 'A company known as SEC Technologies has created a special type of paint that blocks Wi-Fi signals so that you can be sure hackers can ', '1', '13.32');
INSERT INTO `links` VALUES ('2', 'Airplanes To Become WiFi Hotspots', '2007-04-04 06:49:15', 'Airlines will start turning their airplanes into WiFi hotspots beginning early next year, WSJ reports. Here\'s what you need to know...', '2', '54.85');
INSERT INTO `links` VALUES ('3', 'Planet VIP-195 GSM/WiFi Phone With Windows Messanger', '2007-04-04 06:50:47', 'The phone does comply with IEEE 802.11b and IEEE 802.11g to provide phone capability via WiFi. As GSM phone the VIP-195 support 900/1800/1900 band and GPRS too. It comes with simple button to switch between WiFi or GSM mod', '1', '16.25');

-- ----------------------------
-- Table structure for links64
-- ----------------------------
DROP TABLE IF EXISTS `links64`;
CREATE TABLE `links64` (
  `id` bigint(11) NOT NULL AUTO_INCREMENT,
  `name` varchar(255) NOT NULL,
  `created_at` datetime NOT NULL,
  `description` text,
  `group_id` int(11) NOT NULL,
  PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=4294967300 DEFAULT CHARSET=utf8;

-- ----------------------------
-- Records of links64
-- ----------------------------
INSERT INTO `links64` VALUES ('4294967297', 'Paint Protects WiFi Network from Hackers', '2007-04-04 06:48:10', 'A company known as SEC Technologies has created a special type of paint that blocks Wi-Fi signals so that you can be sure hackers can ', '1');
INSERT INTO `links64` VALUES ('4294967298', 'Airplanes To Become WiFi Hotspots', '2007-04-04 06:49:15', 'Airlines will start turning their airplanes into WiFi hotspots beginning early next year, WSJ reports. Here\'s what you need to know...', '2');
INSERT INTO `links64` VALUES ('4294967299', 'Planet VIP-195 GSM/WiFi Phone With Windows Messanger', '2007-04-04 06:50:47', 'The phone does comply with IEEE 802.11b and IEEE 802.11g to provide phone capability via WiFi. As GSM phone the VIP-195 support 900/1800/1900 band and GPRS too. It comes with simple button to switch between WiFi or GSM mod', '1');

-- ----------------------------
-- Table structure for links_tags
-- ----------------------------
DROP TABLE IF EXISTS `links_tags`;
CREATE TABLE `links_tags` (
  `link_id` int(11) NOT NULL,
  `tag_id` int(11) NOT NULL,
  PRIMARY KEY (`link_id`,`tag_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

-- ----------------------------
-- Records of links_tags
-- ----------------------------
INSERT INTO `links_tags` VALUES ('1', '1');
INSERT INTO `links_tags` VALUES ('1', '2');
INSERT INTO `links_tags` VALUES ('1', '3');
INSERT INTO `links_tags` VALUES ('1', '4');
INSERT INTO `links_tags` VALUES ('2', '5');
INSERT INTO `links_tags` VALUES ('2', '6');
INSERT INTO `links_tags` VALUES ('2', '7');
INSERT INTO `links_tags` VALUES ('2', '8');
INSERT INTO `links_tags` VALUES ('3', '1');
INSERT INTO `links_tags` VALUES ('3', '7');
INSERT INTO `links_tags` VALUES ('3', '9');
INSERT INTO `links_tags` VALUES ('3', '10');

-- ----------------------------
-- Table structure for tags
-- ----------------------------
DROP TABLE IF EXISTS `tags`;
CREATE TABLE `tags` (
  `docid` int(11) NOT NULL,
  `tagid` int(11) NOT NULL,
  UNIQUE KEY `docid` (`docid`,`tagid`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

-- ----------------------------
-- Records of tags
-- ----------------------------
INSERT INTO `tags` VALUES ('1', '1');
INSERT INTO `tags` VALUES ('1', '3');
INSERT INTO `tags` VALUES ('1', '5');
INSERT INTO `tags` VALUES ('1', '7');
INSERT INTO `tags` VALUES ('2', '2');
INSERT INTO `tags` VALUES ('2', '4');
INSERT INTO `tags` VALUES ('2', '6');
INSERT INTO `tags` VALUES ('3', '15');
INSERT INTO `tags` VALUES ('4', '7');
INSERT INTO `tags` VALUES ('4', '40');
  • win + R > cmd 在命令行中进入sphinx 根目录
执行命令,test1为数据库预置数据
 1. indexer.exe test1

执行命令
 2. searchd.exe --pidfile

你可能感兴趣的:(技术,Python)