本篇文章主要是对MySQL学习时的一些总结,作为学习笔记记录。
CREATE TABLE staffs (
id INT PRIMARY KEY AUTO_INCREMENT,
NAME VARCHAR (24) NULL DEFAULT '' COMMENT '姓名',
age INT NOT NULL DEFAULT 0 COMMENT '年龄',
pos VARCHAR (20) NOT NULL DEFAULT '' COMMENT '职位',
add_time TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '入职时间'
) CHARSET utf8 COMMENT '员工记录表' ;
INSERT INTO staffs(NAME,age,pos,add_time) VALUES('z3',22,'manager',NOW());
INSERT INTO staffs(NAME,age,pos,add_time) VALUES('July',23,'dev',NOW());
INSERT INTO staffs(NAME,age,pos,add_time) VALUES('2000',23,'dev',NOW());
INSERT INTO staffs(NAME,age,pos,add_time) VALUES(NULL,23,'dev',NOW());
SELECT * FROM staffs;
ALTER TABLE staffs ADD INDEX idx_staffs_nameAgePos(NAME, age, pos);
最好进行全值匹配
索引idx_staffs_nameAgePos建立索引时以name、age、pos的顺序建立的。全值匹配表示按顺序匹配的
EXPLAIN SELECT * FROM staffs WHERE NAME = 'July';
EXPLAIN SELECT * FROM staffs WHERE NAME = 'July' AND age = 25;
EXPLAIN SELECT * FROM staffs WHERE NAME = 'July' AND age = 25 AND pos = 'dev';
最佳左前缀法则
但还存在一种特殊情况:
不在索引列上做任何操作(计算、函数、(自动or手动)类型转换),否则会导致索引失效而转向全表扫描
EXPLAIN SELECT * FROM staffs WHERE left(NAME,4) = 'July';
存储引擎不能使用索引中范围条件右边的列
若有索引则能使用到索引,范围条件右边的索引会失效(范围条件右边与范围条件使用的同一个组合索引,右边的才会失效。若是不同索引则不会失效)
尽量使用覆盖索引(只访问索引的查询(索引列和查询列一致)),减少select *
mysql在使用不等于(!= 或者<>)的时候无法使用索引会导致全表扫描
is not null 也无法使用索引,但是is null是可以使用索引的
like以通配符开头('%abc...'),mysql索引失效会变成全表扫描的操作
问题:解决like '%string%'时索引不被使用的方法?
此时可以借用覆盖索引的方法解决该问题
建表
CREATE TABLE `tbl_user` ( `id` INT (11) NOT NULL AUTO_INCREMENT, `NAME` VARCHAR (20) DEFAULT NULL, `age` INT (11) DEFAULT NULL, email VARCHAR (20) DEFAULT NULL, PRIMARY KEY (`id`) ) ENGINE = INNODB AUTO_INCREMENT = 1 DEFAULT CHARSET = utf8 ; #drop table tbl_user INSERT INTO tbl_user(NAME,age,email) VALUES('1aa1',21,'[email protected]'); INSERT INTO tbl_user(NAME,age,email) VALUES('2aa2',222,'[email protected]'); INSERT INTO tbl_user(NAME,age,email) VALUES('3aa3',265,'[email protected]'); INSERT INTO tbl_user(NAME,age,email) VALUES('4aa4',21,'[email protected]'); INSERT INTO tbl_user(NAME,age,email) VALUES('aa',121,'[email protected]');
对下列语句进行explain分析:
EXPLAIN SELECT NAME,age FROM tbl_user WHERE NAME LIKE '%aa%'; EXPLAIN SELECT id FROM tbl_user WHERE NAME LIKE '%aa%'; EXPLAIN SELECT NAME FROM tbl_user WHERE NAME LIKE '%aa%'; EXPLAIN SELECT age FROM tbl_user WHERE NAME LIKE '%aa%'; EXPLAIN SELECT id,NAME FROM tbl_user WHERE NAME LIKE '%aa%'; EXPLAIN SELECT id,NAME,age FROM tbl_user WHERE NAME LIKE '%aa%'; EXPLAIN SELECT NAME,age FROM tbl_user WHERE NAME LIKE '%aa%'; EXPLAIN SELECT * FROM tbl_user WHERE NAME LIKE '%aa%'; EXPLAIN SELECT id,NAME,age,email FROM tbl_user WHERE NAME LIKE '%aa%';
建立索引
CREATE INDEX idx_user_nameAge ON tbl_user(NAME,age);
对上述语句同样进行explain分析:
从上面可以看出,若非要将like查询的匹配符号%写在左侧,则可以借用覆盖索引的形式来提供查询效率。
字符串不加单引号索引失效
底层进行转换使索引失效,使用了函数造成索引失效
少用or,用它来连接时会索引失效
总结
假设index(a,b,c)
where语句 | 索引是否被使用 |
where a = 3 | Y,使用到a |
where a = 3 and b = 5 | Y,使用到a,b |
where a = 3 and b = 5 and c = 4 | Y,使用到a,b,c |
where b = 3 或者 where b = 3 and c = 4 或者 where c = 4 | N |
where a = 3 and c = 5 | 使用到a, 但是c不可以,b中间断了 |
where a = 3 and b > 4 and c = 5 | 使用到a和b, c不能用在范围之后,b后断了 |
where a = 3 and b like 'kk%' and c = 4 | Y,使用到a,b,c |
where a = 3 and b like '%kk' and c = 4 | Y,只用到a |
where a = 3 and b like '%kk%' and c = 4 | Y,只用到a |
where a = 3 and b like 'k%kk%' and c = 4 | Y,使用到a,b,c |
建表
CREATE TABLE test03 (
id INT PRIMARY KEY NOT NULL AUTO_INCREMENT,
c1 CHAR(10),
c2 CHAR(10),
c3 CHAR(10),
c4 CHAR(10),
c5 CHAR(10)
) ;
INSERT INTO test03(c1,c2,c3,c4,c5) VALUES('a1','a2','a3','a4','a5');
INSERT INTO test03(c1,c2,c3,c4,c5) VALUES('b1','b2','b3','b4','b5');
INSERT INTO test03(c1,c2,c3,c4,c5) VALUES('c1','c2','c3','c4','c5');
INSERT INTO test03(c1,c2,c3,c4,c5) VALUES('d1','d2','d3','d4','d5');
INSERT INTO test03(c1,c2,c3,c4,c5) VALUES('e1','e2','e3','e4','e5');
CREATE INDEX idx_test03_c1234 ON test03(c1,c2,c3,c4);
案例1
EXPLAIN SELECT * FROM test03 WHERE c1='a1';
EXPLAIN SELECT * FROM test03 WHERE c1='a1' AND c2='a2';
EXPLAIN SELECT * FROM test03 WHERE c1='a1' AND c2='a2' AND c3='a3';
EXPLAIN SELECT * FROM test03 WHERE c1='a1' AND c2='a2' AND c3='a3' AND c4='a4';
案例2
EXPLAIN SELECT * FROM test03 WHERE c1='a1' AND c2='a2' AND c4='a4' AND c3='a3';
案例3
EXPLAIN SELECT * FROM test03 WHERE c1='a1' AND c2='a2' AND c3>'a3' AND c4='a4';
案例4
EXPLAIN SELECT * FROM test03 WHERE c1='a1' AND c2='a2' AND c4>'a4' AND c3='a3';
案例5
EXPLAIN SELECT * FROM test03 WHERE c1='a1' AND c2='a2' AND c4='a4' ORDER BY c3;
此时c3作用在排序而不是查找。
案例6
EXPLAIN SELECT * FROM test03 WHERE c1='a1' AND c2='a2' ORDER BY c3;
案例7
EXPLAIN SELECT * FROM test03 WHERE c1='a1' AND c2='a2' ORDER BY c4;
此时跳过了c3,直接order by c4,产生了filesort。
案例8
EXPLAIN SELECT * FROM test03 WHERE c1='a1' AND c5='a5' ORDER BY c2,c3;
只用c1一个字段索引,但是c2、c3用于排序,无filesort。
EXPLAIN SELECT * FROM test03 WHERE c1='a1' AND c5='a5' ORDER BY c3,c2;
建的索引是1234,没有按照顺序进行order by,产生了filesort。
案例9
EXPLAIN SELECT * FROM test03 WHERE c1='a1' AND c2='a2' ORDER BY c2,c3;
案例10
EXPLAIN SELECT * FROM test03 WHERE c1='a1' AND c2='a2' AND c5='a5' ORDER BY c2,c3;
用c1、c2两个字段索引,但是c2、c3用于排序,无filesort。
案例11
EXPLAIN SELECT * FROM test03 WHERE c1='a1' AND c2='a2' AND c5='a5' ORDER BY c3,c2;
此时有常量c2的情况,尽管order by c3,c2不符合索引次序,但仍不会有filesort。
案例12
EXPLAIN SELECT * FROM test03 WHERE c1='a1' AND c5='a5' ORDER BY c3,c2;
案例13
EXPLAIN SELECT * FROM test03 WHERE c1='a1' AND c4='a4' GROUP BY c2,c3;
案例14
EXPLAIN SELECT * FROM test03 WHERE c1='a1' AND c4='a4' GROUP BY c3,c2;
CREATE TABLE IF NOT EXISTS `article` (
`id` INT (10) UNSIGNED NOT NULL PRIMARY KEY AUTO_INCREMENT,
`author_id` INT (10) UNSIGNED NOT NULL,
`category_id` INT (10) UNSIGNED NOT NULL,
`views` INT (10) UNSIGNED NOT NULL,
`comments` INT (10) UNSIGNED NOT NULL,
`title` VARBINARY (255) NOT NULL,
`content` TEXT NOT NULL
) ;
INSERT INTO `article` (
`author_id`,
`category_id`,
`views`,
`comments`,
`title`,
`content`
)
VALUES
(1, 1, 1, 1, '1', '1'),
(2, 2, 2, 2, '2', '2'),
(1, 1, 3, 3, '3', '3') ;
查询 category_id为1且comments大于1的情况下,views最多的article_id:
EXPLAIN SELECT id,author_id FROM article WHERE category_id = 1 AND comments > 1 ORDER BY views DESC LIMIT 1;
上边的explain中type为all,而extra中出现了filesort,因此需要优化
添加索引
CREATE INDEX idx_article_ccv ON article(category_id,comments,views);
再次进行explain
从上边的结果可以看出,type变为了range,这是符合要求的,但是extra中出现了filesort,这一点是不能接受的,这是为什么呢?
删除旧索引并建立新的索引
DROP INDEX idx_article_ccv ON article;
CREATE INDEX idx_article_cv ON article(category_id,views);
再次进行explain
此时可以看到,type变为了ref,Extra 中的 Using filesort 也消失了,达到了优化的目的。
CREATE TABLE IF NOT EXISTS `class` (
`id` INT (10) UNSIGNED NOT NULL AUTO_INCREMENT,
`card` INT (10) UNSIGNED NOT NULL,
PRIMARY KEY (`id`)
) ;
CREATE TABLE IF NOT EXISTS `book` (
`bookid` INT (10) UNSIGNED NOT NULL AUTO_INCREMENT,
`card` INT (10) UNSIGNED NOT NULL,
PRIMARY KEY (`bookid`)
) ;
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO class(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
INSERT INTO book(card) VALUES(FLOOR(1 + (RAND() * 20)));
进行关联查询explain分析
EXPLAIN SELECT * FROM class LEFT JOIN book ON class.card = book.card;
从上面的结果可以看出,分析的结果中type为all,需要进行优化。
添加索引
ALTER TABLE `book` ADD INDEX Y ( `card`);
再次进行关联查询explain分析
从上面的结果可以看出,对book表添加了索引之后,table为book的一行type变为了ref,rows也变小了,优化效果比较明显。这是由于左外连接的特性决定的,左边一定都有,LEFT JOIN条件用于确定如何从右表进行搜索,,所以对于左外连接来说,右侧才是应该关注的重点,一定要建立索引。
删除旧索引建立新的索引
DROP INDEX Y ON book;
ALTER TABLE class ADD INDEX X (card);
再次进行关联查询explain分析
从上图中可以看出,对于左外连接如果在左表建立索引,对于查找的效率提升并不明显。
有索引,大表驱动小表
SELECT SQL_NO_CACHE SUM(sal) FROM emp WHERE deptno IN (SELECT deptno FROM dept);
# 用 exists 是否存在,存在返回一条记录,exists 是作为一个查询判断用,select后返回什么不重要
SELECT SQL_NO_CACHE SUM(sal) FROM emp WHERE EXISTS (SELECT 1 FROM dept WHERE emp.deptno=dept.deptno);
SELECT SQL_NO_CACHE SUM(sal) FROM emp INNER JOIN dept ON emp.deptno=dept.deptno;
有索引,小表驱动大表
SELECT SQL_NO_CACHE SUM(e.sal) FROM (SELECT * FROM emp WHERE id<10000) e WHERE EXISTS (SELECT 1 FROM emp WHERE e.deptno=emp.deptno);
SELECT SQL_NO_CACHE SUM(e.sal) FROM (SELECT * FROM emp WHERE id<10000) e INNER JOIN (SELECT DISTINCT deptno FROM emp) m ON m.deptno=e.deptno;
SELECT SQL_NO_CACHE SUM(sal) FROM emp WHERE deptno IN (SELECT deptno FROM dept);
从上边的结果可以看出,有索引小驱动大表性能优于大表驱动小表。
无索引,小表驱动大表
SELECT SQL_NO_CACHE SUM(sal) FROM emp WHERE deptno IN (SELECT deptno FROM dept);
SELECT SQL_NO_CACHE SUM(sal) FROM emp WHERE EXISTS (SELECT 1 FROM dept WHERE emp.deptno=dept.deptno);
SELECT SQL_NO_CACHE SUM(sal) FROM emp INNER JOIN dept ON emp.deptno=dept.deptno;
无索引,小表驱动大表
SELECT SQL_NO_CACHE SUM(e.sal) FROM (SELECT * FROM emp WHERE id<10000) e WHERE EXISTS (SELECT 1 FROM emp WHERE e.deptno=emp.deptno);
SELECT SQL_NO_CACHE SUM(e.sal) FROM (SELECT * FROM emp WHERE id<10000) e INNER JOIN (SELECT DISTINCT deptno FROM emp) m ON m.deptno=e.deptno;
SELECT SQL_NO_CACHE SUM(sal) FROM emp WHERE deptno IN (SELECT deptno FROM dept);
有索引的情况下用 inner join是最好的,其次是in,exists最糟糕
无索引的情况下用小表驱动大表 因为join方式需要distinct ,没有索引distinct消耗性能较大,所以exists性能最佳in其次 join性能最差
无索引的情况下大表驱动小表,in和exists的性能应该是接近的 都比较糟糕,exists稍微好一点(不超过5%),但是inner join由于使用了join buffer所以快很多,如果为left join则最慢
order by子句,要尽量使用index方式排序,避免使用filesort方式排序
建表
CREATE TABLE tblA (
id INT PRIMARY KEY NOT NULL AUTO_INCREMENT,
age INT,
birth TIMESTAMP NOT NULL,
NAME VARCHAR (200)
) ;
INSERT INTO tblA(age,birth,NAME) VALUES(22,NOW(),'abc');
INSERT INTO tblA(age,birth,NAME) VALUES(23,NOW(),'bcd');
INSERT INTO tblA(age,birth,NAME) VALUES(24,NOW(),'def');
CREATE INDEX idx_A_ageBirth ON tblA(age,birth,NAME);
案例1
案例2
结论
MySQL支持二种方式的排序,FileSort和Index,Index效率高。对于index排序来说,MySQL扫描索引本身完成排序,FileSort方式效率较低。
ORDER BY满足两情况,会使用Index方式排序:
尽可能在索引列上完成排序操作,遵照索引建的最佳左前缀。
如果不在索引列上,filesort有两种算法:双路排序和单路排序。
双路排序
取一批数据,要对磁盘进行了两次扫描,众所周知,I\O是很耗时的,所以在mysql4.1之后,出现了第二种改进的算法,就是单路排序。
单路排序
从磁盘读取查询需要的所有列,按照order by列在buffer对它们进行排序,然后扫描排序后的列表进行输出,它的效率更快一些,避免了第二次读取数据,并且把随机IO变成了顺序IO,但是它会使用更多的空间,因为它把每一行都保存在内存中了。
结论
单路排序总体而言好过双路,但是用单路有问题:
优化策略
为deptno字段添加索引:
从上面的结果看出,为deptno添加索引并没有什么效果。需要再次进行优化。
先利用覆盖索引把要取的数据行的主键取到,然后再用这个主键列与数据表做关联(查询的数据量小了后):
对比两者的查询速度,优化前:
优化后:
order by后的字段XXX有索引且sql语句中有limit时,