是将存储于数据库中的整本书或整篇文章中的任意内容信息查找出来的技术。
//创建测试表
CREATE TABLE fts_a(
FTS_DOC_ID BIGINT UNSIGNED AUTO_INCREMENT NOT NULL,
body TEXT,
PRIMARY KEY(FTS_DOC_ID)
);
//插入测试数据
INSERT INTO fts_a SELECT NULL,'Pease porridge in the pot';
INSERT INTO fts_a SELECT NULL,'Pease porridge hot,pease porridge cold';
INSERT INTO fts_a SELECT NULL,'Nine days old';
INSERT INTO fts_a SELECT NULL,'Some like it hot,some like it cold';
INSERT INTO fts_a SELECT NULL,'Some like it in the pot';
INSERT INTO fts_a SELECT NULL,'Nine days old';
INSERT INTO fts_a SELECT NULL,'I like code days';
//创建倒排索引
CREATE FULLTEXT INDEX idx_fts ON fts_a(body);
//查看分词对应的信息:
mysql> show variables like 'innodb_ft_aux_table';
+---------------------+-------+
| Variable_name | Value |
+---------------------+-------+
| innodb_ft_aux_table | |
+---------------------+-------+
mysql> SET GLOBAL innodb_ft_aux_table='learn/fts_a'; //learn为数据库名
mysql> show variables like 'innodb_ft_aux_table';
+---------------------+-------------+
| Variable_name | Value |
+---------------------+-------------+
| innodb_ft_aux_table | learn/fts_a |
+---------------------+-------------+
//可以看到每个word都对应了一个DOC_ID和POSITION。此外,还记录了FIRST_DOC_ID、LAST_DOC_ID以及DOC_COUNT,分别代表了该word第一次出现的文档ID,最后一次出现的文档ID,以及该word在多少个文档中存在。
mysql> SELECT*FROM information_schema.INNODB_FT_INDEX_TABLE;
+----------+--------------+-------------+-----------+--------+----------+
| WORD | FIRST_DOC_ID | LAST_DOC_ID | DOC_COUNT | DOC_ID | POSITION |
+----------+--------------+-------------+-----------+--------+----------+
| code | 7 | 7 | 1 | 7 | 7 |
| cold | 2 | 4 | 2 | 2 | 34 |
| cold | 2 | 4 | 2 | 4 | 30 |
| days | 3 | 7 | 3 | 3 | 5 |
| days | 3 | 7 | 3 | 6 | 5 |
| days | 3 | 7 | 3 | 7 | 12 |
| hot | 2 | 4 | 2 | 2 | 15 |
| hot | 2 | 4 | 2 | 4 | 13 |
| like | 4 | 7 | 3 | 4 | 5 |
| like | 4 | 7 | 3 | 4 | 17 |
| like | 4 | 7 | 3 | 5 | 5 |
| like | 4 | 7 | 3 | 7 | 2 |
| nine | 3 | 6 | 2 | 3 | 0 |
| nine | 3 | 6 | 2 | 6 | 0 |
| old | 3 | 6 | 2 | 3 | 10 |
| old | 3 | 6 | 2 | 6 | 10 |
| pease | 1 | 2 | 2 | 1 | 0 |
| pease | 1 | 2 | 2 | 2 | 0 |
| pease | 1 | 2 | 2 | 2 | 19 |
| porridge | 1 | 2 | 2 | 1 | 6 |
| porridge | 1 | 2 | 2 | 2 | 6 |
| porridge | 1 | 2 | 2 | 2 | 19 |
| pot | 1 | 5 | 2 | 1 | 22 |
| pot | 1 | 5 | 2 | 5 | 20 |
| some | 4 | 5 | 2 | 4 | 0 |
| some | 4 | 5 | 2 | 4 | 17 |
| some | 4 | 5 | 2 | 5 | 0 |
+----------+--------------+-------------+-----------+--------+----------+
//全文检索:stopword列表:表示该列表中的word不需要对其进行索引分词操作
//创建用户表
CREATE TABLE `user_stopword` (
`value` varchar(30) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
//设置路径
mysql> show variables like 'innodb_ft_server_stopword_table';
+---------------------------------+-------+
| Variable_name | Value |
+---------------------------------+-------+
| innodb_ft_server_stopword_table | |
+---------------------------------+-------+
mysql> SET GLOBAL innodb_ft_server_stopword_table="learn/user_stopword";
mysql> show variables like 'innodb_ft_server_stopword_table';
+---------------------------------+---------------------+
| Variable_name | Value |
+---------------------------------+---------------------+
| innodb_ft_server_stopword_table | learn/user_stopword |
+---------------------------------+---------------------+
对于InnoDB存储引擎的全文检索,还需要考虑以下的因素:
mysql> show variables like 'innodb_ft_min_token_size';
+--------------------------+-------+
| Variable_name | Value |
+--------------------------+-------+
| innodb_ft_min_token_size | 3 |
+--------------------------+-------+
mysql> show variables like 'innodb_ft_max_token_size';
+--------------------------+-------+
| Variable_name | Value |
+--------------------------+-------+
| innodb_ft_max_token_size | 84 |
+--------------------------+-------+
当前InnoDB存储引擎的全文检索还存在以下的限制:
//(1) 不使用全文搜素技术
mysql> SELECT*FROM fts_a WHERE body LIKE'%Pease%';
+------------+----------------------------------------+
| FTS_DOC_ID | body |
+------------+----------------------------------------+
| 1 | Pease porridge in the pot |
| 2 | Pease porridge hot,pease porridge cold |
+------------+----------------------------------------+
mysql> explain SELECT*FROM fts_a WHERE body LIKE'%Pease%';
+----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+-------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+-------------+
| 1 | SIMPLE | fts_a | NULL | ALL | NULL | NULL | NULL | NULL | 7 | 14.29 | Using where |
+----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+-------------+
//(2) 使用全文检索技术
mysql> SELECT * FROM fts_a WHERE MATCH(body) AGAINST('Porridge' IN NATURAL LANGUAGE MODE);
//mysql> SELECT * FROM fts_a WHERE MATCH(body) AGAINST('Porridge');
+------------+----------------------------------------+
| FTS_DOC_ID | body |
+------------+----------------------------------------+
| 2 | Pease porridge hot,pease porridge cold |
| 1 | Pease porridge in the pot |
+------------+----------------------------------------+
mysql> explain SELECT * FROM fts_a WHERE MATCH(body) AGAINST('Porridge' IN NATURAL LANGUAGE MODE);
+----+-------------+-------+------------+----------+---------------+---------+---------+-------+------+----------+-------------------------------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+----------+---------------+---------+---------+-------+------+----------+-------------------------------+
| 1 | SIMPLE | fts_a | NULL | fulltext | idx_fts | idx_fts | 0 | const | 1 | 100.00 | Using where; Ft_hints: sorted |
+----+-------------+-------+------------+----------+---------------+---------+---------+-------+------+----------+-------------------------------+
//统计次数
//慢:需要进行相关性的排序统计
mysql> SELECT count(*) FROM fts_a WHERE MATCH(body) AGAINST('Porridge');
+----------+
| count(*) |
+----------+
| 2 |
+----------+
//更快:不需要进行相关性的排序统计
mysql> SELECT COUNT(IF(MATCH(body) AGAINST('Porridge'),1,NULL)) AS count FROM fts_a;
+-------+
| count |
+-------+
| 2 |
+-------+
//查看相关性
mysql> SELECT fts_doc_id,body, MATCH(body)AGAINST('Porridge') AS Relevance FROM fts_a;
+------------+----------------------------------------+--------------------+
| fts_doc_id | body | Relevance |
+------------+----------------------------------------+--------------------+
| 1 | Pease porridge in the pot | 0.2960100471973419 |
| 2 | Pease porridge hot,pease porridge cold | 0.5920200943946838 |
| 3 | Nine days old | 0 |
| 4 | Some like it hot,some like it cold | 0 |
| 5 | Some like it in the pot | 0 |
| 6 | Nine days old | 0 |
| 7 | I like code days | 0 |
+------------+----------------------------------------+--------------------+
//the 在stopword中 故相关性为0
mysql> insert into user_stopword values('the');
mysql> select * from user_stopword;
+-------+
| value |
+-------+
| the |
+-------+
mysql> SELECT fts_doc_id AS id,body, MATCH(body)AGAINST('the') AS rl FROM fts_a;
+----+----------------------------------------+------+
| id | body | rl |
+----+----------------------------------------+------+
| 1 | Pease porridge in the pot | 0 |
| 2 | Pease porridge hot,pease porridge cold | 0 |
| 3 | Nine days old | 0 |
| 4 | Some like it hot,some like it cold | 0 |
| 5 | Some like it in the pot | 0 |
| 6 | Nine days old | 0 |
| 7 | I like code days | 0 |
+----+----------------------------------------+------+
Boolean全文检索支持以下几种操作符:
//要求查询有字符串Pease但没有hot的文档
mysql> SELECT * FROM fts_a WHERE MATCH(body) AGAINST('+Pease-hot' IN BOOLEAN MODE);
+------------+---------------------------+
| FTS_DOC_ID | body |
+------------+---------------------------+
| 1 | Pease porridge in the pot |
+------------+---------------------------+
//要求查询有字符串Pease但有hot的文档
mysql> SELECT * FROM fts_a WHERE MATCH(body) AGAINST('+Pease+hot' IN BOOLEAN MODE);
+------------+----------------------------------------+
| FTS_DOC_ID | body |
+------------+----------------------------------------+
| 2 | Pease porridge hot,pease porridge cold |
+------------+----------------------------------------+
//要求查询有字符串Pease但有hot的文档
mysql> SELECT * FROM fts_a WHERE MATCH(body) AGAINST('Pease hot' IN BOOLEAN MODE);
+------------+----------------------------------------+
| FTS_DOC_ID | body |
+------------+----------------------------------------+
| 2 | Pease porridge hot,pease porridge cold |
| 1 | Pease porridge in the pot |
| 4 | Some like it hot,some like it cold |
+------------+----------------------------------------+
//邻近搜索:查询Pease和hot相差多少字节
mysql> SELECT * FROM fts_a WHERE MATCH(body) AGAINST('"Pease hot"@30'IN BOOLEAN MODE);
+------------+----------------------------------------+
| FTS_DOC_ID | body |
+------------+----------------------------------------+
| 2 | Pease porridge hot,pease porridge cold |
+------------+----------------------------------------+
1 row in set (0.01 sec)
mysql> SELECT * FROM fts_a WHERE MATCH(body) AGAINST('"Pease hot"@1'IN BOOLEAN MODE);
Empty set (0.00 sec)
//根据是否有单词like或pot进行相关性统计,并且出现单词pot后相关性需要增加,出现some单词后相关性需要减少
mysql> SELECT * From fts_a WHERE MATCH(body) AGAINST('like>hot' IN BOOLEAN MODE);
+------------+----------------------------------------+
| FTS_DOC_ID | body |
+------------+----------------------------------------+
| 4 | Some like it hot,some like it cold |
| 2 | Pease porridge hot,pease porridge cold |
| 5 | Some like it in the pot |
| 7 | I like code days |
+------------+----------------------------------------+
mysql> SELECT fts_doc_id,body,MATCH(body)AGAINST('like>pot' IN BOOLEAN MODE) AS Relevance FROM fts_a;
+------------+----------------------------------------+---------------------+
| fts_doc_id | body | Relevance |
+------------+----------------------------------------+---------------------+
| 1 | Pease porridge in the pot | 1.2960100173950195 |
| 2 | Pease porridge hot,pease porridge cold | 0 |
| 3 | Nine days old | 0 |
| 4 | Some like it hot,some like it cold | 0.27081382274627686 |
| 5 | Some like it in the pot | 1.4314169883728027 |
| 6 | Nine days old | 0 |
| 7 | I like code days | 0.13540691137313843 |
+------------+----------------------------------------+---------------------+
mysql> SELECT * From fts_a WHERE MATCH(body) AGAINST('like>hot SELECT fts_doc_id,body,MATCH(body)AGAINST('like>pot SELECT * From fts_a WHERE MATCH(body) AGAINST('po*'IN BOOLEAN MODE);
+------------+----------------------------------------+
| FTS_DOC_ID | body |
+------------+----------------------------------------+
| 2 | Pease porridge hot,pease porridge cold |
| 1 | Pease porridge in the pot |
| 5 | Some like it in the pot |
+------------+----------------------------------------+
//查看是否包含like hot 短语(而不是2个单词)
mysql> SELECT * From fts_a WHERE MATCH(body) AGAINST('"like hot"'IN BOOLEAN MODE);
Empty set (0.00 sec)
mysql> SELECT * From fts_a WHERE MATCH(body) AGAINST('like hot'IN BOOLEAN MODE);
+------------+----------------------------------------+
| FTS_DOC_ID | body |
+------------+----------------------------------------+
| 4 | Some like it hot,some like it cold |
| 2 | Pease porridge hot,pease porridge cold |
| 5 | Some like it in the pot |
| 7 | I like code days |
+------------+----------------------------------------+
全文检索的扩展查询。这种查询通常在查询的关键词太短,用户需要implied knowledge(隐含知识)时进行。
由于Query Expansion的全文检索可能带来许多非相关性的查询,因此在使用时,用户可能需要非常谨慎。
//创建表
CREATE TABLE articles(
id INT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
title VARCHAR(200),
body TEXT,
FULLTEXT(title,body)
)ENGINE=InnoDB;
//插入数据
INSERT INTO articles(title,body)VALUES
('MySQL Tutorial','DBMS stands for DataBase...'),
('How To Use MySQL Well','After you went through a...'),
('Optimizing MySQL','In this tutorial we will show...'),
('1001 MySQL Tricks','1.Never run mysqld as root.2....'),
('MySQL vs.YourSQL','In the following database comparison...'),
('MySQL Security','When configured properly,MySQL...'),
('Tuning DB2','For IBM database...'),
('IBM History','DB2 hitory for IBM...');
//普通检索
mysql> SELECT * FROM articles WHERE MATCH(title,body) AGAINST ('database' IN NATURAL LANGUAGE MODE);
+----+------------------+-----------------------------------------+
| id | title | body |
+----+------------------+-----------------------------------------+
| 1 | MySQL Tutorial | DBMS stands for DataBase... |
| 5 | MySQL vs.YourSQL | In the following database comparison... |
| 7 | Tuning DB2 | For IBM database... |
+----+------------------+-----------------------------------------+
//Query Expansion
mysql> SELECT * FROM articles WHERE MATCH(title,body) AGAINST('database' WITH QUERY EXPANSION);
+----+-----------------------+-----------------------------------------+
| id | title | body |
+----+-----------------------+-----------------------------------------+
| 5 | MySQL vs.YourSQL | In the following database comparison... |
| 1 | MySQL Tutorial | DBMS stands for DataBase... |
| 7 | Tuning DB2 | For IBM database... |
| 8 | IBM History | DB2 hitory for IBM... |
| 3 | Optimizing MySQL | In this tutorial we will show... |
| 6 | MySQL Security | When configured properly,MySQL... |
| 2 | How To Use MySQL Well | After you went through a... |
| 4 | 1001 MySQL Tricks | 1.Never run mysqld as root.2.... |
+----+-----------------------+-----------------------------------------+