本文简单记叙在Linux环境下通过TPC-H生成MySQL数据库测试数据的步骤,作为后续参考。
生成数据的量级如下,数据库需要300G左右空间。
表名 | 数据行数 | 量级 |
customer | 15000000 | 1.5千万 |
lineitem | 600037902 | 6亿 |
nation | 25 | |
orders | 150000000 | 1.5亿 |
part | 20000000 | 2千万 |
partsupp | 80000000 | 8千万 |
region | 5 | |
supplier | 1000000 | 1百万 |
1下载TPC-H:https://download.csdn.net/download/kkdelta/12390317
这个源文件针对mysql做了定制修改
修改1:makefile (如果是生成其它数据库类型的测试数据,请修改DATABASE= 对应数据库)
CC = gcc
# Current values for DATABASE are: INFORMIX, DB2, TDAT (Teradata)
# SQLSERVER, SYBASE, ORACLE, VECTORWISE
# Current values for MACHINE are: ATT, DOS, HP, IBM, ICL, MVS,
# SGI, SUN, U2200, VMS, LINUX, WIN32
# Current values for WORKLOAD are: TPCH
DATABASE= MYSQL
MACHINE = LINUX
WORKLOAD = TPCH
修改2:头文件添加了如下信息,默认没有MYSQL
#ifdef MYSQL
#define GEN_QUERY_PLAN ""
#define START_TRAN "START TRANSACTION"
#define END_TRAN "COMMIT"
#define SET_OUTPUT ""
#define SET_ROWCOUNT "limit %d;\n"
#define SET_DBASE "use %s;\n"
#endif
2解压编译
#tar -xzvf
#进入tpch_2.18.0_rc2/dbgen 目录执行 make命令
3生成数据,生成100G测试数据,更多dbgen 参数请参照https://github.com/electrum/tpch-dbgen?spm=a2c4g.11186623.2.12.49503a21XMT2IL
nohup ./dbgen -f -s 100 >out.txt 2>&1 &
4导入数据
41. 创建数据库表结构语句
CREATE TABLE `customer` (
`C_CUSTKEY` int(11) NOT NULL,
`C_NAME` varchar(25) NOT NULL,
`C_ADDRESS` varchar(40) NOT NULL,
`C_NATIONKEY` int(11) NOT NULL,
`C_PHONE` varchar(15) NOT NULL,
`C_ACCTBAL` decimal(12,2) NOT NULL,
`C_MKTSEGMENT` varchar(10) NOT NULL,
`C_COMMENT` varchar(117) NOT NULL,
PRIMARY KEY (`C_CUSTKEY`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE TABLE `lineitem` (
`L_ORDERKEY` bigint(20) NOT NULL,
`L_PARTKEY` int(11) NOT NULL,
`L_SUPPKEY` int(11) NOT NULL,
`L_LINENUMBER` bigint(20) NOT NULL,
`L_QUANTITY` decimal(12,2) NOT NULL,
`L_EXTENDEDPRICE` decimal(12,2) NOT NULL,
`L_DISCOUNT` decimal(12,2) NOT NULL,
`L_TAX` decimal(12,2) NOT NULL,
`L_RETURNFLAG` varchar(1) NOT NULL,
`L_LINESTATUS` varchar(1) NOT NULL,
`L_SHIPDATE` date NOT NULL,
`L_COMMITDATE` date NOT NULL,
`L_RECEIPTDATE` date NOT NULL,
`L_SHIPINSTRUCT` varchar(25) NOT NULL,
`L_SHIPMODE` varchar(10) NOT NULL,
`L_COMMENT` varchar(44) NOT NULL,
PRIMARY KEY (`L_ORDERKEY`,`L_LINENUMBER`,`L_SHIPDATE`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE TABLE `nation` (
`N_NATIONKEY` int(11) NOT NULL,
`N_NAME` varchar(25) NOT NULL,
`N_REGIONKEY` int(11) NOT NULL,
`N_COMMENT` varchar(152) DEFAULT NULL,
PRIMARY KEY (`N_NATIONKEY`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE TABLE `orders` (
`O_ORDERKEY` bigint(20) NOT NULL,
`O_CUSTKEY` int(11) NOT NULL,
`O_ORDERSTATUS` varchar(1) NOT NULL,
`O_TOTALPRICE` decimal(12,2) NOT NULL,
`O_ORDERDATE` date NOT NULL,
`O_ORDERPRIORITY` varchar(15) NOT NULL,
`O_CLERK` varchar(15) NOT NULL,
`O_SHIPPRIORITY` int(11) NOT NULL,
`O_COMMENT` varchar(79) NOT NULL,
PRIMARY KEY (`O_ORDERKEY`,`O_ORDERDATE`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE TABLE `part` (
`P_PARTKEY` int(11) NOT NULL,
`P_NAME` varchar(55) NOT NULL,
`P_MFGR` varchar(25) NOT NULL,
`P_BRAND` varchar(10) NOT NULL,
`P_TYPE` varchar(25) NOT NULL,
`P_SIZE` int(11) NOT NULL,
`P_CONTAINER` varchar(10) NOT NULL,
`P_RETAILPRICE` decimal(12,2) NOT NULL,
`P_COMMENT` varchar(23) NOT NULL,
PRIMARY KEY (`P_PARTKEY`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE TABLE `partsupp` (
`PS_PARTKEY` int(11) NOT NULL,
`PS_SUPPKEY` int(11) NOT NULL,
`PS_AVAILQTY` int(11) NOT NULL,
`PS_SUPPLYCOST` decimal(12,2) NOT NULL,
`PS_COMMENT` varchar(199) NOT NULL,
PRIMARY KEY (`PS_PARTKEY`,`PS_SUPPKEY`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE TABLE `region` (
`R_REGIONKEY` int(11) NOT NULL,
`R_NAME` varchar(25) NOT NULL,
`R_COMMENT` varchar(152) DEFAULT NULL,
PRIMARY KEY (`R_REGIONKEY`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE TABLE `supplier` (
`S_SUPPKEY` int(11) NOT NULL,
`S_NAME` varchar(25) NOT NULL,
`S_ADDRESS` varchar(40) NOT NULL,
`S_NATIONKEY` int(11) NOT NULL,
`S_PHONE` varchar(15) NOT NULL,
`S_ACCTBAL` decimal(12,2) NOT NULL,
`S_COMMENT` varchar(101) NOT NULL,
PRIMARY KEY (`S_SUPPKEY`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
在100GB数据规模下,缺省MySQL表结构无法完成全部测试,可以通过增加索引提升数据查询性能。
如果是在加载完数据后再创建index会比较耗费时间,注意单条执行
create index idx_c_mk on customer(c_mktsegment);
create index idx_c_ck on customer(c_custkey);
create index idx_c_nk on customer(c_nationkey);
create index idx_o_ck on orders(o_custkey);
create index idx_o_ok on orders(o_orderkey);
create index idx_o_od on orders(o_orderdate);
create index idx_o_op on orders(o_orderpriority);
create index idx_o_os on orders(o_orderstatus);
create index idx_li_sd on lineitem(l_shipdate);
create index idx_li_rf on lineitem(l_returnflag);
create index idx_li_sm on lineitem(l_shipmode);
create index idx_li_cd on lineitem(l_commitdate);
create index idx_li_rd on lineitem(l_receiptdate);
create index idx_li_pk on lineitem(l_partkey);
create index idx_li_sk on lineitem(l_suppkey);
create index idx_li_ok on lineitem(l_orderkey);
create index idx_li_dc on lineitem(l_discount);
create index idx_li_q on lineitem(l_quantity);
create index idx_li_rf_ls on lineitem(l_returnflag,l_linestatus);
create index idx_p_s on part(p_size);
create index idx_p_t on part(p_type);
create index idx_p_pk on part(p_partkey);
create index idx_p_b on part(p_brand);
create index idx_p_c on part(p_container);
create index idx_ps_pk on partsupp(ps_partkey);
create index idx_ps_sc on partsupp(ps_supplycost);
create index idx_ps_sk on partsupp(ps_suppkey);
create index idx_s_sk on supplier(s_suppkey);
create index idx_s_nk on supplier(s_nationkey);
create index idx_n_nk on nation(n_nationkey);
create index idx_n_rk on nation(n_regionkey);
create index idx_n_n on nation(n_name);
create index idx_r_rk on region(r_regionkey);
create index idx_r_n on region(r_name);
4.2导入数据,通过mysql客户端命令行工具连接mysql,执行导入语句。
LOAD DATA LOCAL INFILE 'customer.tbl' INTO TABLE CUSTOMER
FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE 'orders.tbl' INTO TABLE ORDERS
FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE 'lineitem.tbl' INTO TABLE LINEITEM
FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE 'nation.tbl' INTO TABLE NATION
FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE 'partsupp.tbl' INTO TABLE PARTSUPP
FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE 'part.tbl' INTO TABLE PART
FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE 'region.tbl' INTO TABLE REGION
FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE 'supplier.tbl' INTO TABLE SUPPLIER
FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
5执行SQL语句测试
5.1 一些单表查询性能测试(百万以上的记录如果没有索引的话性能就很差了)
Num | SQL | Seconds | 表行数 | mark | 机器配置 |
1 | select count(*)from supplier; | 0.95 | 1百万 | 阿里云RDS 8核16G | |
select * from supplier where S_NAME ='Supplier#000000187' | 3 | 1百万 | 无索引 | ||
2 | select count(*) from CUSTOMER; | 20.36 | 1.5千万 | ||
select * from CUSTOMER order by C_NAME limit 100; | 64.5 | ||||
select * from CUSTOMER where C_PHONE ='13-750-942-6364'; | 51 | 无索引1分钟左右 | |||
select count(c_custkey) from CUSTOMER | 5 | 1.5千万 | 有索引 | ||
select * from CUSTOMER where c_custkey =1124; | 0 | 按主键索引查单条 | |||
3 | select count(*) from ORDERS; | 93.8 | 1.5亿 | ||
select * from orders where O_ORDERKEY =20001; | 0 | 按索引查单条 | |||
select * from orders where O_CUSTKEY =6296771; | 372.5 | 无索引6分钟左右 | |||
4 | select count(*) from lineitem ; | 431.9 | 6亿 | ||
SELECT * FROM lineitem where L_ORDERKEY = 6789; | 0 | 按主键索引查单条 | |||
SELECT * FROM lineitem where L_SHIPDATE = '1998-04-19'; | 0.078 | 索引查询 | |||
SELECT * FROM lineitem where L_PARTKEY ='18205184'; | 1659.9 | 31/6亿 | 非索引需要27.665分钟 |
5.2多表关联查询
-------------------------SQL-1
select
l_returnflag,
l_linestatus,
sum(l_quantity) as sum_qty,
sum(l_extendedprice) as sum_base_price,
sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
avg(l_quantity) as avg_qty,
avg(l_extendedprice) as avg_price,
avg(l_discount) as avg_disc,
count(*) as count_order
from
lineitem
where
l_shipdate <= date '1998-12-01' - interval '120' day
group by
l_returnflag,
l_linestatus
order by
l_returnflag,
l_linestatus;
-------------------------SQL-2
select
s_acctbal,
s_name,
n_name,
p_partkey,
p_mfgr,
s_address,
s_phone,
s_comment
from
part,
supplier,
partsupp,
nation,
region
where
p_partkey = ps_partkey
and s_suppkey = ps_suppkey
and p_size = 48
and p_type like '%STEEL'
and s_nationkey = n_nationkey
and n_regionkey = r_regionkey
and r_name = 'EUROPE'
and ps_supplycost = (
select
min(ps_supplycost)
from
partsupp,
supplier,
nation,
region
where
p_partkey = ps_partkey
and s_suppkey = ps_suppkey
and s_nationkey = n_nationkey
and n_regionkey = r_regionkey
and r_name = 'EUROPE'
)
order by
s_acctbal desc,
n_name,
s_name,
p_partkey
limit 100;
-------------------------SQL-3
select
l_orderkey,
sum(l_extendedprice * (1 - l_discount)) as revenue,
o_orderdate,
o_shippriority
from
customer,
orders,
lineitem
where
c_mktsegment = 'MACHINERY'
and c_custkey = o_custkey
and l_orderkey = o_orderkey
and o_orderdate < date '1995-03-23'
and l_shipdate > date '1995-03-23'
group by
l_orderkey,
o_orderdate,
o_shippriority
order by
revenue desc,
o_orderdate
limit 10;
select
o_orderpriority,
count(*) as order_count
from
orders
where
o_orderdate >= date '1996-07-01'
and o_orderdate < date '1996-07-01' + interval '3' month
and exists (
select
*
from
lineitem
where
l_orderkey = o_orderkey
and l_commitdate < l_receiptdate
)
group by
o_orderpriority
order by
o_orderpriority;
-------------------------SQL-4
select
n_name,
sum(l_extendedprice * (1 - l_discount)) as revenue
from
customer,
orders,
lineitem,
supplier,
nation,
region
where
c_custkey = o_custkey
and l_orderkey = o_orderkey
and l_suppkey = s_suppkey
and c_nationkey = s_nationkey
and s_nationkey = n_nationkey
and n_regionkey = r_regionkey
and r_name = 'EUROPE'
and o_orderdate >= date '1996-01-01'
and o_orderdate < date '1996-01-01' + interval '1' year
group by
n_name
order by
revenue desc;
select
sum(l_extendedprice * l_discount) as revenue
from
lineitem
where
l_shipdate >= date '1996-01-01'
and l_shipdate < date '1996-01-01' + interval '1' year
and l_discount between 0.02 - 0.01 and 0.02 + 0.01
and l_quantity < 24;
-------------------------SQL-5
select
supp_nation,
cust_nation,
l_year,
sum(volume) as revenue
from
(
select
n1.n_name as supp_nation,
n2.n_name as cust_nation,
extract(year from l_shipdate) as l_year,
l_extendedprice * (1 - l_discount) as volume
from
supplier,
lineitem,
orders,
customer,
nation n1,
nation n2
where
s_suppkey = l_suppkey
and o_orderkey = l_orderkey
and c_custkey = o_custkey
and s_nationkey = n1.n_nationkey
and c_nationkey = n2.n_nationkey
and (
(n1.n_name = 'CANADA' and n2.n_name = 'BRAZIL')
or (n1.n_name = 'BRAZIL' and n2.n_name = 'CANADA')
)
and l_shipdate between date '1995-01-01' and date '1996-12-31'
) as shipping
group by
supp_nation,
cust_nation,
l_year
order by
supp_nation,
cust_nation,
l_year;
-------------------------SQL-6
select
o_year,
sum(case
when nation = 'BRAZIL' then volume
else 0
end) / sum(volume) as mkt_share
from
(
select
extract(year from o_orderdate) as o_year,
l_extendedprice * (1 - l_discount) as volume,
n2.n_name as nation
from
part,
supplier,
lineitem,
orders,
customer,
nation n1,
nation n2,
region
where
p_partkey = l_partkey
and s_suppkey = l_suppkey
and l_orderkey = o_orderkey
and o_custkey = c_custkey
and c_nationkey = n1.n_nationkey
and n1.n_regionkey = r_regionkey
and r_name = 'AMERICA'
and s_nationkey = n2.n_nationkey
and o_orderdate between date '1995-01-01' and date '1996-12-31'
and p_type = 'LARGE ANODIZED COPPER'
) as all_nations
group by
o_year
order by
o_year;
-------------------------SQL-7
select
nation,
o_year,
sum(amount) as sum_profit
from
(
select
n_name as nation,
extract(year from o_orderdate) as o_year,
l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount
from
part,
supplier,
lineitem,
partsupp,
orders,
nation
where
s_suppkey = l_suppkey
and ps_suppkey = l_suppkey
and ps_partkey = l_partkey
and p_partkey = l_partkey
and o_orderkey = l_orderkey
and s_nationkey = n_nationkey
and p_name like '%maroon%'
) as profit
group by
nation,
o_year
order by
nation,
o_year desc;
测试结果
更多SQL测试可参照 https://help.aliyun.com/document_detail/156330.html?spm=a2c4g.11186623.6.775.10865130GYKkgh
更多TPC-H 信息可以参考 https://blog.csdn.net/leixingbang1989/article/details/8766047