mysql全文索引与停止词

#mysql全文索引与停止词

/*

1.全文索引(FULLTEXT INDEX) FULLTEXT索引仅可用于MyISAM表,不可用于INNODB表  
2.全文索引在mysql的默认情况下,对于中文意义不大
因为英文有空格,标点符号来拆成单词,进而对单词进行索引
而对于中文,没有空格来隔开单词,mysql无法识别每个中文词

中文分词并不是一件很简单的事情,真正能把中文分词这件事
情做好的公司并不多
中文分词其实是一门挺大的学问,千万不要小看这件事情


经常听到一些人说,这很简单,那很简单,真正叫他去做的时
候,他才发现并非表面上的那么简单,大家以后少说这很简单,那很
简单的这类话,因为这会让人觉得你的头脑太过于简单。

*/

/*

全文索引用法:
MATCH(全文索引名) AGAINST ('你要搜素的字符串')

*/


/*

全文索引的停止词:
全文索引不针对非常频繁的词做索引
如:this,is,you,my,me等等,就像我们汉
语中的我,你,他,的,是,等等这些字和词语使用的非常频繁,不
会对这些使用频繁的词语做索引

*/

CREATE TABLE account2(
id int,
email VARCHAR(30),
telephone CHAR(11),
introduce VARCHAR(300),
#introduce text,
FULLTEXT KEY (introduce)
)ENGINE=MyISAM DEFAULT CHARSET=utf8;

#添加全文索引
ALTER TABLE account2 ADD FULLTEXT (introduce);

#同义词:是指意义相同的一组词语
#EXPLAIN语句可以用作DESC(DESCRIBE)的一个同义词,以下效果相同
EXPLAIN account2;
DESC account2;
DESCRIBE account2;

#
SHOW INDEX FROM account2;
SHOW KEYS FROM account2;

#
SELECT * from account2;

#查看建表的sql语句
SHOW CREATE TABLE account2;
#以下是建表的sql语句
/*
CREATE TABLE `account2` (
  `id` int(11) DEFAULT NULL,
  `email` varchar(30) DEFAULT NULL,
  `telephone` char(11) DEFAULT NULL,
  `introduce` varchar(300) DEFAULT NULL,
  FULLTEXT KEY `introduce` (`introduce`)
) ENGINE=MyISAM DEFAULT CHARSET=utf8
*/

#插入中文
INSERT INTO account2 VALUES(1, '[email protected]', '123', '喜欢看水浒传,喜欢中国四大名著,酷爱文学');
INSERT INTO account2 VALUES(2, '[email protected]', '456', '天若有情天亦老,人间正道是沧桑');
INSERT INTO account2 VALUES(3, '[email protected]', '789', '乘风破浪会有时,直挂云帆济沧海');
INSERT INTO account2 VALUES(4, '[email protected]', '012', '一万年太久,只争朝夕');
INSERT INTO account2 VALUES(4, '[email protected]', '012', '江山如此多娇,引无数英雄竞折腰');
#插入英文
INSERT INTO account2 VALUES
(1, '[email protected]', '123', 'welcome to china, welcome to my hometown 江西省赣州市于都县');
INSERT INTO account2 VALUES
(2, '[email protected]', '456', 'Education is not the filling of a pail, but the lighting of a fire.');
INSERT INTO account2 VALUES
(3, '[email protected]', '789', 'The young always have the same problem: how to rebel and conform at the same time. They have now solved this by defying their parents and copying one another. ');
INSERT INTO account2 VALUES
(4, '[email protected]', '012', 'A man is not old until regrets take the place of dreams.');
INSERT INTO account2 VALUES
(4, '[email protected]', '012', 'If you are an introvert, force yourself to be an extravert. You\'ll be happier.');

#
SELECT * from account2;


#全文索引在mysql的默认情况下,对于中文意义不大
#全文索引对中文不会发挥作用
SELECT *, MATCH(introduce) AGAINST ('水浒') AS '匹配度','水浒' FROM account2;
SELECT *, MATCH(introduce) AGAINST ('朝夕') AS '匹配度','朝夕' FROM account2;
SELECT *, MATCH(introduce) AGAINST ('于都县') AS '匹配度','于都县' FROM account2;

#
SELECT *, MATCH(introduce) AGAINST ('filling') AS '匹配度','filling' FROM account2;
SELECT *, MATCH(introduce) AGAINST ('welcome') AS '匹配度','welcome' FROM account2;
SELECT *, MATCH(introduce) AGAINST ('hometown') AS '匹配度','hometown' FROM account2;
SELECT *, MATCH(introduce) AGAINST ('conform') AS '匹配度','conform' FROM account2;
SELECT *, MATCH(introduce) AGAINST ('regrets') AS '匹配度','regrets' FROM account2;
SELECT *, MATCH(introduce) AGAINST ('introvert') AS '匹配度','introvert' FROM account2;
SELECT *, MATCH(introduce) AGAINST ('yourself') AS '匹配度','yourself' FROM account2;

#全文索引对中文不会发挥作用
#查不到数据
SELECT * FROM account2 WHERE MATCH(introduce) AGAINST ('水浒');
#查不到数据
SELECT * FROM account2 WHERE MATCH(introduce) AGAINST ('于都县');
#查不到数据(遇到了停止词)
SELECT * FROM account2 WHERE MATCH(introduce) AGAINST ('welcome');
#可以查到数据
SELECT * FROM account2 WHERE MATCH(introduce) AGAINST ('hometown');

#
EXPLAIN SELECT * FROM account2 WHERE MATCH(introduce) AGAINST ('hometown');

 

你可能感兴趣的:(mysql,全文索引,停止词,查询,数据结构)