我们都知道,在mysql的开发规范中,建议表上的主键使用自增id,这样在插入的时候,不用去排序,移动数据,减少了碎片发生,插入速度也不受影响,但是对于分区表,因为分区键需要包含在主键中,那么分区表的主键是选择业务字段还是使用自增id+分区键的方式?下面测试一下:
CREATE TABLE `t_table` (
`account_id` varchar(20) NOT NULL DEFAULT '',
`table_catalog` varchar(20) DEFAULT NULL,
`table_schema` varchar(20) DEFAULT NULL,
`table_name` varchar(20) DEFAULT NULL,
`table_type` varchar(20) DEFAULT NULL,
`engine` varchar(20) DEFAULT NULL,
`version` varchar(20) DEFAULT NULL,
`table_rows` varchar(20) DEFAULT NULL,
`checksum` varchar(20) DEFAULT NULL,
`table_comment` varchar(20) DEFAULT NULL,
`start_date` date NOT NULL DEFAULT '0000-00-00',
`end_date` date DEFAULT NULL,
PRIMARY KEY (`account_id`,`start_date`)
)
/*!50100 PARTITION BY RANGE (TO_DAYS(start_date))
(PARTITION p201510 VALUES LESS THAN (736268) ENGINE = InnoDB,
PARTITION p201511 VALUES LESS THAN (736298) ENGINE = InnoDB,
PARTITION p201512 VALUES LESS THAN (736329) ENGINE = InnoDB,
PARTITION p201601 VALUES LESS THAN (736360) ENGINE = InnoDB,
PARTITION p201602 VALUES LESS THAN (736389) ENGINE = InnoDB,
PARTITION p201603 VALUES LESS THAN (736420) ENGINE = InnoDB,
PARTITION p201604 VALUES LESS THAN (736450) ENGINE = InnoDB,
PARTITION p201605 VALUES LESS THAN (736481) ENGINE = InnoDB,
PARTITION p201606 VALUES LESS THAN (736511) ENGINE = InnoDB,
PARTITION p201607 VALUES LESS THAN (736542) ENGINE = InnoDB,
PARTITION p201608 VALUES LESS THAN (736573) ENGINE = InnoDB,
PARTITION p201609 VALUES LESS THAN (736603) ENGINE = InnoDB,
PARTITION p201610 VALUES LESS THAN (736634) ENGINE = InnoDB,
PARTITION p201611 VALUES LESS THAN (736664) ENGINE = InnoDB,
PARTITION p201612 VALUES LESS THAN (736695) ENGINE = InnoDB,
PARTITION p201701 VALUES LESS THAN (736726) ENGINE = InnoDB,
PARTITION p201702 VALUES LESS THAN (736754) ENGINE = InnoDB,
PARTITION p201703 VALUES LESS THAN (736785) ENGINE = InnoDB,
PARTITION p201704 VALUES LESS THAN (736815) ENGINE = InnoDB,
PARTITION p201705 VALUES LESS THAN (736846) ENGINE = InnoDB,
PARTITION p201706 VALUES LESS THAN (736876) ENGINE = InnoDB,
PARTITION p201707 VALUES LESS THAN (736907) ENGINE = InnoDB,
PARTITION p201708 VALUES LESS THAN (736938) ENGINE = InnoDB,
PARTITION p201709 VALUES LESS THAN (736968) ENGINE = InnoDB,
PARTITION p201710 VALUES LESS THAN (736999) ENGINE = InnoDB,
PARTITION p201711 VALUES LESS THAN (737029) ENGINE = InnoDB,
PARTITION p201712 VALUES LESS THAN (737060) ENGINE = InnoDB,
PARTITION p201801 VALUES LESS THAN (737091) ENGINE = InnoDB,
PARTITION p201802 VALUES LESS THAN (737119) ENGINE = InnoDB,
PARTITION p201803 VALUES LESS THAN (737150) ENGINE = InnoDB,
PARTITION p201804 VALUES LESS THAN (737180) ENGINE = InnoDB,
PARTITION p201805 VALUES LESS THAN MAXVALUE ENGINE = InnoDB) */
生成测试数据的脚本:
#!/usr/bin/python
#encoding: utf-8
import MySQLdb
import random
import string
# 打开数据库连接
db = MySQLdb.connect(host="127.0.0.1",user="root",passwd="",db="test",port=3333)
# # 使用cursor()方法获取操作游标
cursor = db.cursor()
# # 使用execute方法执行SQL语句
a = None
for i in range (1,2000000):
sql = "insert ignore into test.t_table(account_id,table_catalog,table_schema,table_name,table_type,engine,version,tab
le_rows,checksum,table_comment,start_date,end_date)" \
" values(rand_string(10),rand_string(10),'%s','%s','%s','%s',%d,%d,%d,'%s',concat('2017-',floor(4+rand()*10),'-',f
loor(rand()*30)),concat('2017-',floor(4+rand()*10),'-',floor(rand()*30)))"%(random.choice('abcdefghijklmnopqrstuvwxyz!@#$
%^&*()'),random.choice('abcdefghijklmnopqrstuvwxyz!@#$%^&*()'),random.choice('abcdefghijklmnopqrstuvwxyz!@#$%^&*()'),rand
om.choice('abcdefghijklmnopqrstuvwxyz!@#$%^&*()'), random.randint(1,50), random.randint(1,50), random.randint(1,50),rando
m.choice('abcdefghijklmnopqrstuvwxyz!@#$%^&*()'))
#print sql
cursor.execute(sql)
db.commit()
# # 关闭数据库连接
db.close()
使用到的生成随机字符串的函数,摘自网络
CREATE DEFINER=`root`@`localhost` FUNCTION `rand_string`(n INT) RETURNS varchar(255) CHARSET utf8
BEGIN
DECLARE chars_str varchar(100) DEFAULT 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789';
DECLARE return_str varchar(255) DEFAULT '';
DECLARE i INT DEFAULT 0;
WHILE i < n DO
SET return_str = concat(return_str,substring(chars_str , FLOOR(1 + RAND()*62 ),1));
SET i = i +1;
END WHILE;
RETURN return_str;
END
向测试表中插入了200w的数据,查看碎片情况
mysql> select count(*) from t_table;
+———-+
| count(*) |
+———-+
| 1999975 |
+———-+
mysql> select table_name,engine,table_rows,data_length+index_length length,DATA_FREE from information_schema.tables where TABLE_SCHEMA=’test’ and table_name=’t_table’;
+————+——–+————+———–+———–+
| table_name | engine | table_rows | length | DATA_FREE |
+————+——–+————+———–+———–+
| t_table | InnoDB | 1992893 | 227377152 | 44040192 |
+————+——–+————+———–+———–+
下面向自增主键+分区字段的组合中插入表
CREATE TABLE `t_table2` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`account_id` varchar(20) DEFAULT NULL,
`table_catalog` varchar(20) DEFAULT NULL,
`table_schema` varchar(20) DEFAULT NULL,
`table_name` varchar(20) DEFAULT NULL,
`table_type` varchar(20) DEFAULT NULL,
`engine` varchar(20) DEFAULT NULL,
`version` varchar(20) DEFAULT NULL,
`table_rows` varchar(20) DEFAULT NULL,
`checksum` varchar(20) DEFAULT NULL,
`table_comment` varchar(20) DEFAULT NULL,
`start_date` date NOT NULL DEFAULT '0000-00-00',
`end_date` date DEFAULT NULL,
PRIMARY KEY (`id`,`start_date`)
) ENGINE=InnoDB AUTO_INCREMENT=905084 DEFAULT CHARSET=utf8
/*!50100 PARTITION BY RANGE (TO_DAYS(start_date))
(PARTITION p201510 VALUES LESS THAN (736268) ENGINE = InnoDB,
PARTITION p201511 VALUES LESS THAN (736298) ENGINE = InnoDB,
PARTITION p201512 VALUES LESS THAN (736329) ENGINE = InnoDB,
PARTITION p201601 VALUES LESS THAN (736360) ENGINE = InnoDB,
PARTITION p201602 VALUES LESS THAN (736389) ENGINE = InnoDB,
PARTITION p201603 VALUES LESS THAN (736420) ENGINE = InnoDB,
PARTITION p201604 VALUES LESS THAN (736450) ENGINE = InnoDB,
PARTITION p201605 VALUES LESS THAN (736481) ENGINE = InnoDB,
PARTITION p201606 VALUES LESS THAN (736511) ENGINE = InnoDB,
PARTITION p201607 VALUES LESS THAN (736542) ENGINE = InnoDB,
PARTITION p201608 VALUES LESS THAN (736573) ENGINE = InnoDB,
PARTITION p201609 VALUES LESS THAN (736603) ENGINE = InnoDB,
PARTITION p201610 VALUES LESS THAN (736634) ENGINE = InnoDB,
PARTITION p201611 VALUES LESS THAN (736664) ENGINE = InnoDB,
PARTITION p201612 VALUES LESS THAN (736695) ENGINE = InnoDB,
PARTITION p201701 VALUES LESS THAN (736726) ENGINE = InnoDB,
PARTITION p201702 VALUES LESS THAN (736754) ENGINE = InnoDB,
PARTITION p201703 VALUES LESS THAN (736785) ENGINE = InnoDB,
PARTITION p201704 VALUES LESS THAN (736815) ENGINE = InnoDB,
PARTITION p201705 VALUES LESS THAN (736846) ENGINE = InnoDB,
PARTITION p201706 VALUES LESS THAN (736876) ENGINE = InnoDB,
PARTITION p201707 VALUES LESS THAN (736907) ENGINE = InnoDB,
PARTITION p201708 VALUES LESS THAN (736938) ENGINE = InnoDB,
PARTITION p201709 VALUES LESS THAN (736968) ENGINE = InnoDB,
PARTITION p201710 VALUES LESS THAN (736999) ENGINE = InnoDB,
PARTITION p201711 VALUES LESS THAN (737029) ENGINE = InnoDB,
PARTITION p201712 VALUES LESS THAN (737060) ENGINE = InnoDB,
PARTITION p201801 VALUES LESS THAN (737091) ENGINE = InnoDB,
PARTITION p201802 VALUES LESS THAN (737119) ENGINE = InnoDB,
PARTITION p201803 VALUES LESS THAN (737150) ENGINE = InnoDB,
PARTITION p201804 VALUES LESS THAN (737180) ENGINE = InnoDB,
PARTITION p201805 VALUES LESS THAN MAXVALUE ENGINE = InnoDB) */
再次执行插入测试数据的代码,查看碎片情况
mysql> select count(*) from t_table2;
+———-+
| count(*) |
+———-+
| 1999999 |
+———-+
1 row in set (0.63 sec)
mysql> select table_name,engine,table_rows,data_length+index_length length,DATA_FREE from information_schema.tables where TABLE_SCHEMA=’test’ and table_name in(‘t_table2’,’t_table’);
+————+——–+————+———–+———–+
| table_name | engine | table_rows | length | DATA_FREE |
+————+——–+————+———–+———–+
| t_table | InnoDB | 1992893 | 227377152 | 44040192 |
| t_table2 | InnoDB | 1993098 | 152698880 | 41943040 |
+————+——–+————+———–+———–+
看到table2中的碎片大小要小一些,但空闲空间依然很大,但是我的理解是没有碎片或是有少量的碎片,因为id是自增的,这个目前还不理解分区表的存放形式,看来是跟想象中的不太一样。下面有个table3表是普通表,id自增主键,插入200w数据后,空闲空间5M,远比40M小。
optimize 表后,碎片消失
mysql> optimize table t_table;
+————–+———-+———-+——————————————————————-+
| Table | Op | Msg_type | Msg_text |
+————–+———-+———-+——————————————————————-+
| test.t_table | optimize | note | Table does not support optimize, doing recreate + analyze instead |
| test.t_table | optimize | status | OK |
+————–+———-+———-+——————————————————————-+
2 rows in set (9.56 sec)
mysql> select table_name,engine,table_rows,data_length+index_length length,DATA_FREE from information_schema.tables where TABLE_SCHEMA=’test’ and table_name in(‘t_table2’,’t_table’);
+————+——–+————+———–+———–+
| table_name | engine | table_rows | length | DATA_FREE |
+————+——–+————+———–+———–+
| t_table | InnoDB | 1984043 | 147668992 | 0 |
| t_table2 | InnoDB | 1993098 | 152698880 | 41943040 |
+————+——–+————+———–+———–+
分区的行数信息如下
mysql> SELECT PARTITION_NAME,TABLE_ROWS FROM INFORMATION_SCHEMA.PARTITIONS WHERE TABLE_NAME='t_table2';
+----------------+------------+
| PARTITION_NAME | TABLE_ROWS |
+----------------+------------+
| p201510 | 259307 |
| p201511 | 0 |
| p201512 | 0 |
| p201601 | 0 |
| p201602 | 0 |
| p201603 | 0 |
| p201604 | 0 |
| p201605 | 0 |
| p201606 | 0 |
| p201607 | 0 |
| p201608 | 0 |
| p201609 | 0 |
| p201610 | 0 |
| p201611 | 0 |
| p201612 | 0 |
| p201701 | 0 |
| p201702 | 0 |
| p201703 | 0 |
| p201704 | 193085 |
| p201705 | 193344 |
| p201706 | 192407 |
| p201707 | 192660 |
| p201708 | 192030 |
| p201709 | 192886 |
| p201710 | 191968 |
| p201711 | 193030 |
| p201712 | 192359 |
| p201801 | 0 |
| p201802 | 0 |
| p201803 | 0 |
| p201804 | 0 |
| p201805 | 0 |
+----------------+------------+
mysql> SELECT PARTITION_NAME,TABLE_ROWS FROM INFORMATION_SCHEMA.PARTITIONS WHERE TABLE_NAME='t_table';
+----------------+------------+
| PARTITION_NAME | TABLE_ROWS |
+----------------+------------+
| p201510 | 258544 |
| p201511 | 0 |
| p201512 | 0 |
| p201601 | 0 |
| p201602 | 0 |
| p201603 | 0 |
| p201604 | 0 |
| p201605 | 0 |
| p201606 | 0 |
| p201607 | 0 |
| p201608 | 0 |
| p201609 | 0 |
| p201610 | 0 |
| p201611 | 0 |
| p201612 | 0 |
| p201701 | 0 |
| p201702 | 0 |
| p201703 | 0 |
| p201704 | 193187 |
| p201705 | 192100 |
| p201706 | 192778 |
| p201707 | 192552 |
| p201708 | 187915 |
| p201709 | 193230 |
| p201710 | 193004 |
| p201711 | 187255 |
| p201712 | 193456 |
| p201801 | 0 |
| p201802 | 0 |
| p201803 | 0 |
| p201804 | 0 |
| p201805 | 0 |
+----------------+------------+
CREATE TABLE t_table3
(
id
int(11) NOT NULL AUTO_INCREMENT,
account_id
varchar(20) DEFAULT NULL,
table_catalog
varchar(20) DEFAULT NULL,
table_schema
varchar(20) DEFAULT NULL,
table_name
varchar(20) DEFAULT NULL,
table_type
varchar(20) DEFAULT NULL,
engine
varchar(20) DEFAULT NULL,
version
varchar(20) DEFAULT NULL,
table_rows
varchar(20) DEFAULT NULL,
checksum
varchar(20) DEFAULT NULL,
table_comment
varchar(20) DEFAULT NULL,
start_date
date DEFAULT NULL,
end_date
date DEFAULT NULL,
PRIMARY KEY (id
)
mysql> select table_name,engine,table_rows,data_length+index_length length,DATA_FREE from information_schema.tables where TABLE_SCHEMA=’test’ and table_name in(‘t_table2’,’t_table’,’t_table3’);
+————+——–+————+———–+———–+
| table_name | engine | table_rows | length | DATA_FREE |
+————+——–+————+———–+———–+
| t_table | InnoDB | 1984043 | 147668992 | 0 |
| t_table2 | InnoDB | 1993098 | 152698880 | 9437184 |
| t_table3 | InnoDB | 1991739 | 139100160 | 5242880 |
+————+——–+————+———–+———–+
create index idx_start_date on t_table3(start_date);
Query OK, 0 rows affected (4.95 sec)
ysql>
select table_name,engine,table_rows,data_length+index_length length,DATA_FREE from information_schema.tables where TABLE_SCHEMA=’test’ and table_name in(‘t_table2’,’t_table’,’t_table3’);
+————+——–+————+———–+———–+
| table_name | engine | table_rows | length | DATA_FREE |
+————+——–+————+———–+———–+
| t_table | InnoDB | 1984043 | 147668992 | 0 |
| t_table2 | InnoDB | 1993098 | 152698880 | 9437184 |
| t_table3 | InnoDB | 1991739 | 139100160 | 0 |
+————+——–+————+———–+———–+
我们可以看到在插入完数据后,即使是id自增主键也是有碎片存在,但是在创建索引后,会使用碎片的空间。
总结:
从测试结果看分区表的主键选择效果,id自增+分区字段跟业务字段+分区字段效果上没什么区别。