1、下载tpch
在这个网站下载:
https://github.com/electrum/tpch-dbgen
解压:
unzip tpch_2_16_0v1.zip
2、安装tpch
$cd tpch_2_17_0/dbgen
$vim makefile.suite
更改其中参数:
CC = gcc
DATABASE =SQLSERVER
VECTORWISE =LINUX
WORKLOAD =TPCH
3、安装
$make
4、生成1G的数据
./dbgen -s 1 -f -T L
5、连接PG数据库test(用postgres用户)
/psql test
6、创建表
创建表语句在dss.dll中,找到打开并复制
gedit dss.dll
将复制的创建表语句黏贴出来:
CREATE TABLE NATION ( N_NATIONKEY INTEGER NOT NULL, N_NAME CHAR(25) NOT NULL, N_REGIONKEY INTEGER NOT NULL, N_COMMENT VARCHAR(152));
CREATE TABLE REGION ( R_REGIONKEY INTEGER NOT NULL, R_NAME CHAR(25) NOT NULL, R_COMMENT VARCHAR(152));
CREATE TABLE PART ( P_PARTKEY INTEGER NOT NULL, P_NAME VARCHAR(55) NOT NULL, P_MFGR CHAR(25) NOT NULL, P_BRAND CHAR(10) NOT NULL, P_TYPE VARCHAR(25) NOT NULL, P_SIZE INTEGER NOT NULL, P_CONTAINER CHAR(10) NOT NULL, P_RETAILPRICE DECIMAL(15,2) NOT NULL, P_COMMENT VARCHAR(23) NOT NULL );
CREATE TABLE SUPPLIER ( S_SUPPKEY INTEGER NOT NULL, S_NAME CHAR(25) NOT NULL, S_ADDRESS VARCHAR(40) NOT NULL, S_NATIONKEY INTEGER NOT NULL, S_PHONE CHAR(15) NOT NULL, S_ACCTBAL DECIMAL(15,2) NOT NULL, S_COMMENT VARCHAR(101) NOT NULL);
CREATE TABLE PARTSUPP ( PS_PARTKEY INTEGER NOT NULL, PS_SUPPKEY INTEGER NOT NULL, PS_AVAILQTY INTEGER NOT NULL, PS_SUPPLYCOST DECIMAL(15,2) NOT NULL, PS_COMMENT VARCHAR(199) NOT NULL );
CREATE TABLE CUSTOMER ( C_CUSTKEY INTEGER NOT NULL, C_NAME VARCHAR(25) NOT NULL, C_ADDRESS VARCHAR(40) NOT NULL, C_NATIONKEY INTEGER NOT NULL, C_PHONE CHAR(15) NOT NULL, C_ACCTBAL DECIMAL(15,2) NOT NULL, C_MKTSEGMENT CHAR(10) NOT NULL, C_COMMENT VARCHAR(117) NOT NULL);
CREATE TABLE ORDERS ( O_ORDERKEY INTEGER NOT NULL, O_CUSTKEY INTEGER NOT NULL, O_ORDERSTATUS CHAR(1) NOT NULL, O_TOTALPRICE DECIMAL(15,2) NOT NULL, O_ORDERDATE DATE NOT NULL, O_ORDERPRIORITY CHAR(15) NOT NULL, O_CLERK CHAR(15) NOT NULL, O_SHIPPRIORITY INTEGER NOT NULL, O_COMMENT VARCHAR(79) NOT NULL);
CREATE TABLE LINEITEM ( L_ORDERKEY INTEGER NOT NULL, L_PARTKEY INTEGER NOT NULL, L_SUPPKEY INTEGER NOT NULL, L_LINENUMBER INTEGER NOT NULL, L_QUANTITY DECIMAL(15,2) NOT NULL, L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL, L_DISCOUNT DECIMAL(15,2) NOT NULL, L_TAX DECIMAL(15,2) NOT NULL, L_RETURNFLAG CHAR(1) NOT NULL, L_LINESTATUS CHAR(1) NOT NULL, L_SHIPDATE DATE NOT NULL, L_COMMITDATE DATE NOT NULL, L_RECEIPTDATE DATE NOT NULL, L_SHIPINSTRUCT CHAR(25) NOT NULL, L_SHIPMODE CHAR(10) NOT NULL, L_COMMENT VARCHAR(44) NOT NULL);
7、加工数据
1)将测试数据转换为postgresql识别的格式,删除末尾的分隔符|
$for i in `ls *.tbl`; do sed ‘s/|$//’ $i > ${i/tbl/csv}; done
2)查看刚才另存为csv格式的数据是否存在
$ll -rth *.csv
3)把包含csv文件的目录,软链接到/tmp/dss-data。tpch-pg脚本中一会要用到这个目录
\$pwd
/home/digoal/tpch/tpch_2_17_0/dbgen
\$ln -s /home/digoal/tpch/tpch_2_17_0/dbgen /tmp/dss-data
8、导入数据
test=# \copy customer from '/home/scidb/Downloads/tpch-dbgen-master/customer.csv'with CSV DELIMITER '|';
COPY 150000
test=# \copy supplier from '/home/scidb/Downloads/tpch-dbgen-master/supplier.csv'with CSV DELIMITER '|';
COPY 10000
test=# \copy nation from '/home/scidb/Downloads/tpch-dbgen-master/nation.csv'with CSV DELIMITER '|';
COPY 25
test=# \copy region from '/home/scidb/Downloads/tpch-dbgen-master/region.csv'with CSV DELIMITER '|';
COPY 5
test=# \copy part from '/home/scidb/Downloads/tpch-dbgen-master/part.csv'with CSV DELIMITER '|';
COPY 200000
test=# \copy partsupp from '/home/scidb/Downloads/tpch-dbgen-master/partsupp.csv'with CSV DELIMITER '|';
COPY 800000
test=# \copy order from '/home/scidb/Downloads/tpch-dbgen-master/order.csv'with CSV DELIMITER '|';
/home/scidb/Downloads/tpch-dbgen-master/order.csv: No such file or directory
test=# \copy orders from '/home/scidb/Downloads/tpch-dbgen-master/orders.csv'with CSV DELIMITER '|';
COPY 1500000
test=# \copy lineitem from '/home/scidb/Downloads/tpch-dbgen-master/lineitem.csv'with CSV DELIMITER '|';
COPY 6001215
9、报错
ERROR: could not open file “/home/scidb/Downloads/tpch-dbgen-master/customer.csv” for reading: Permission denied
解决:(全用过,不知道哪个管用了,都写下来)
1) chmod a+rX /home/scidb/Downloads /home/scidb/Downloads/tpch-dbgen-master /home/scidb/Downloads/tpch-dbgen-master/customer.csv
2) chown postgres /home/scidb/Downloads/tpch-dbgen-master/customer.csv
3)最重要的是不能直接写copy!要写/copy!
10、添加外键、主键
载入数据后,就可以添加外键、主键;对每张表添加外键、主键,修改tpch-dbgen文件夹中的dss.ri(用文本编辑器打开),将添加外键和主键的信息都按照以下修改,并注释掉图上的connect to tpcd
-- For table REGION
ALTER TABLE REGION ADD PRIMARY KEY (R_REGIONKEY);
-- For table NATION
ALTER TABLE NATION ADD PRIMARY KEY (N_NATIONKEY);
ALTER TABLE NATION ADD FOREIGN KEY (N_REGIONKEY) references REGION;
COMMIT WORK;
-- For table PART
ALTER TABLE PART ADD PRIMARY KEY (P_PARTKEY);
COMMIT WORK;
-- For table SUPPLIER
ALTER TABLE SUPPLIER ADD PRIMARY KEY (S_SUPPKEY);
ALTER TABLE SUPPLIER ADD FOREIGN KEY (S_NATIONKEY) references NATION;
COMMIT WORK;
-- For table PARTSUPP
ALTER TABLE PARTSUPP ADD PRIMARY KEY (PS_PARTKEY,PS_SUPPKEY);
COMMIT WORK;
-- For table CUSTOMER
ALTER TABLE CUSTOMER ADD PRIMARY KEY (C_CUSTKEY);
ALTER TABLE CUSTOMER ADD FOREIGN KEY (C_NATIONKEY) references NATION;
COMMIT WORK;
-- For table LINEITEM
ALTER TABLE LINEITEM ADD PRIMARY KEY (L_ORDERKEY,L_LINENUMBER);
COMMIT WORK;
-- For table ORDERS
ALTER TABLE ORDERS ADD PRIMARY KEY (O_ORDERKEY);
COMMIT WORK;
-- For table PARTSUPP
ALTER TABLE PARTSUPP ADD FOREIGN KEY (PS_SUPPKEY) references SUPPLIER;
COMMIT WORK;
ALTER TABLE PARTSUPP ADD FOREIGN KEY (PS_PARTKEY) references PART;
COMMIT WORK;
-- For table ORDERS
ALTER TABLE ORDERS ADD FOREIGN KEY (O_CUSTKEY) references CUSTOMER;
COMMIT WORK;
-- For table LINEITEM
ALTER TABLE LINEITEM ADD FOREIGN KEY (L_ORDERKEY) references ORDERS;
COMMIT WORK;
ALTER TABLE LINEITEM ADD FOREIGN KEY (L_PARTKEY,L_SUPPKEY) references PARTSUPP;
COMMIT WORK;
11、测试语句
1)创建一个queries目录,用于存放转换后的tpc-h 测试SQL
$mkdir dss/queries
2)生成测试SQL
$for q in `seq 1 22`
do
DSS_QUERY=dss/templates ./qgen $q >> dss/queries/$q.sql
sed 's/^select/explain select/' dss/queries/$q.sql > dss/queries/$q.explain.sql
cat dss/queries/$q.sql >> dss/queries/$q.explain.sql;
done
12、就完成了
查看表中的信息是否修改\d cunstomer
参考文献:
1、http://blog.163.com/digoal@126/blog/static/16387704020151019111930303/
2、http://www.it165.net/database/html/201604/15072.html