1.最近应单用户画像业务需求,需要满足6个月单用户的天粒度收视情况,呈现近半年的收视趋势图。每天数据量大概在150w条左右,一个月大概4500w,6个月大概2.7亿条。要满足大数据量的秒级响应。由于系统采用的为mysql+mongo+oracle的多数据源的模式,多数据源管理起来复杂难度较大。系统也在逐渐往mysql数据库迁移。在此篇文章里就只聊mysql的十亿条数据的查询效率优化,说到优化不得不聊聊mysql数据库索引,以及数据库表结构设计。
综合思量,采用RANGE方式进行分区(此外还有List,Hash,Liner Hash,Key等方式)最后选择cityCode来进行分区,当然也可以按照天粒度,月粒度的日期进行分区,
CREATE TABLE `single_user_duration_times_day` (
`ID` int(11) NOT NULL AUTO_INCREMENT,
`addTime` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '数据录入时间',
`reportTime` date NOT NULL DEFAULT '0000-00-00' COMMENT '指标上报时间',
`cityCode` int(5) NOT NULL DEFAULT '12500' COMMENT '地市编码',
`businessId` varchar(40) NOT NULL COMMENT '业务账号',
`liveDurationStatistics` decimal(16,2) DEFAULT '0.00' COMMENT '直播时长统计',
`liveTimesStatistics` decimal(16,0) DEFAULT '0' COMMENT '直播次数统计',
`vodDurationStatistics` decimal(16,2) DEFAULT '0.00' COMMENT '点播时长统计',
`vodTimesStatistics` decimal(16,0) DEFAULT '0' COMMENT '点播次数统计',
PRIMARY KEY (`ID`,`reportTime`,`cityCode`,`businessId`),
KEY `idx_businessId` (`businessId`) USING BTREE,
KEY `idx_reportTime_businessId_cityCode` (`businessId`,`cityCode`,`reportTime`) USING BTREE
) ENGINE=InnoDB AUTO_INCREMENT=243257067 DEFAULT CHARSET=utf8 COMMENT='单用户时长、次数天粒度数据'
PARTITION BY RANGE (cityCode)
(PARTITION p12501 VALUES LESS THAN (12502) ENGINE = InnoDB,
PARTITION p12502 VALUES LESS THAN (12503) ENGINE = InnoDB,
PARTITION p12503 VALUES LESS THAN (12504) ENGINE = InnoDB,
PARTITION p12504 VALUES LESS THAN (12505) ENGINE = InnoDB,
PARTITION p12505 VALUES LESS THAN (12506) ENGINE = InnoDB,
PARTITION p12506 VALUES LESS THAN (12507) ENGINE = InnoDB,
PARTITION p12507 VALUES LESS THAN (12508) ENGINE = InnoDB,
PARTITION p12508 VALUES LESS THAN (12509) ENGINE = InnoDB,
PARTITION p12509 VALUES LESS THAN (12510) ENGINE = InnoDB,
PARTITION p12510 VALUES LESS THAN (12511) ENGINE = InnoDB,
PARTITION p12511 VALUES LESS THAN (12512) ENGINE = InnoDB,
PARTITION p12512 VALUES LESS THAN (12513) ENGINE = InnoDB,
PARTITION p12513 VALUES LESS THAN (12514) ENGINE = InnoDB,
PARTITION p12514 VALUES LESS THAN (12515) ENGINE = InnoDB,
PARTITION p12515 VALUES LESS THAN (12516) ENGINE = InnoDB,
PARTITION p12516 VALUES LESS THAN (12517) ENGINE = InnoDB,
PARTITION p12517 VALUES LESS THAN (12518) ENGINE = InnoDB,
PARTITION p12518 VALUES LESS THAN (12519) ENGINE = InnoDB);
2.可能用到的修改分区的语句:
修改表分区
Alter table single_user_duration_times_day partition by range(cityCode)
(PARTITION p12501 VALUES LESS THAN (12502) ENGINE = InnoDB,
PARTITION p12502 VALUES LESS THAN (12503) ENGINE = InnoDB,
PARTITION p12503 VALUES LESS THAN (12504) ENGINE = InnoDB,
PARTITION p12504 VALUES LESS THAN (12505) ENGINE = InnoDB,
PARTITION p12505 VALUES LESS THAN (12506) ENGINE = InnoDB,
PARTITION p12506 VALUES LESS THAN (12507) ENGINE = InnoDB,
PARTITION p12507 VALUES LESS THAN (12508) ENGINE = InnoDB,
PARTITION p12508 VALUES LESS THAN (12509) ENGINE = InnoDB,
PARTITION p12509 VALUES LESS THAN (12510) ENGINE = InnoDB,
PARTITION p12510 VALUES LESS THAN (12511) ENGINE = InnoDB,
PARTITION p12511 VALUES LESS THAN (12512) ENGINE = InnoDB,
PARTITION p12512 VALUES LESS THAN (12513) ENGINE = InnoDB,
PARTITION p12513 VALUES LESS THAN (12514) ENGINE = InnoDB,
PARTITION p12514 VALUES LESS THAN (12515) ENGINE = InnoDB,
PARTITION p12515 VALUES LESS THAN (12516) ENGINE = InnoDB,
PARTITION p12516 VALUES LESS THAN (12517) ENGINE = InnoDB,
PARTITION p12517 VALUES LESS THAN (12518) ENGINE = InnoDB,
PARTITION p12518 VALUES LESS THAN (12519) ENGINE = InnoDB);
3.查看分区信息
mysql 查看表的分区信息
SELECT
PARTITION_NAME
, TABLE_ROWS
, PARTITION_EXPRESSION
, PARTITION_DESCRIPTION
FROM
INFORMATION_SCHEMA.PARTITIONS
WHERE
TABLE_SCHEMA = 'iptvtest'
AND TABLE_NAME = 'single_user_duration_times_day';
4.查询语句,及查询效率
explain
SELECT
DATE_FORMAT(reportTime, '%Y%u') reportTime
, liveDurationStatistics
, liveTimesStatistics
, vodDurationStatistics
, vodTimesStatistics
FROM
single_user_duration_times_day
WHERE
1 = 1
and businessId = '371000000000109906'
and cityCode = '12501'
AND DATE_FORMAT(reportTime, '%Y-%m-%d') >= '2021-05-27'