使用TPC-H可以自动生成大量数据,下面介绍ubuntu中的使用方法。
http://www.tpc.org/tpch/ 奉上链接
网盘链接:https://pan.baidu.com/s/1u3_bwZbyWKrFGqdVzV2_7g 密码:sgnw
进入dbgen目录
cd dbgen
复制makefile.suite到makefile
cp makefile.suite makefile
修改makefile文件
vim makefile
修改为如下内容(gcc要是小写的,因为这个问题百度了一小时左右-_-||)
如果你之前生成过数据表,可以执行make clean先清除一下再执行以下命令。
make
执行dbgen,生成数据表,大小为2G。根据自己需要设定参数。
./dbgen -s 2
我们会发现dbgen目录里增加了许多文件,正是我们后续需要的。
dss.ddl文件里为建表语句,如下:
CREATE TABLE NATION ( N_NATIONKEY INTEGER NOT NULL,
N_NAME CHAR(25) NOT NULL,
N_REGIONKEY INTEGER NOT NULL,
N_COMMENT VARCHAR(152));
CREATE TABLE REGION ( R_REGIONKEY INTEGER NOT NULL,
R_NAME CHAR(25) NOT NULL,
R_COMMENT VARCHAR(152));
CREATE TABLE PART ( P_PARTKEY INTEGER NOT NULL,
P_NAME VARCHAR(55) NOT NULL,
P_MFGR CHAR(25) NOT NULL,
P_BRAND CHAR(10) NOT NULL,
P_TYPE VARCHAR(25) NOT NULL,
P_SIZE INTEGER NOT NULL,
P_CONTAINER CHAR(10) NOT NULL,
P_RETAILPRICE DECIMAL(15,2) NOT NULL,
P_COMMENT VARCHAR(23) NOT NULL );
CREATE TABLE SUPPLIER ( S_SUPPKEY INTEGER NOT NULL,
S_NAME CHAR(25) NOT NULL,
S_ADDRESS VARCHAR(40) NOT NULL,
S_NATIONKEY INTEGER NOT NULL,
S_PHONE CHAR(15) NOT NULL,
S_ACCTBAL DECIMAL(15,2) NOT NULL,
S_COMMENT VARCHAR(101) NOT NULL);
CREATE TABLE PARTSUPP ( PS_PARTKEY INTEGER NOT NULL,
PS_SUPPKEY INTEGER NOT NULL,
PS_AVAILQTY INTEGER NOT NULL,
PS_SUPPLYCOST DECIMAL(15,2) NOT NULL,
PS_COMMENT VARCHAR(199) NOT NULL );
CREATE TABLE CUSTOMER ( C_CUSTKEY INTEGER NOT NULL,
C_NAME VARCHAR(25) NOT NULL,
C_ADDRESS VARCHAR(40) NOT NULL,
C_NATIONKEY INTEGER NOT NULL,
C_PHONE CHAR(15) NOT NULL,
C_ACCTBAL DECIMAL(15,2) NOT NULL,
C_MKTSEGMENT CHAR(10) NOT NULL,
C_COMMENT VARCHAR(117) NOT NULL);
CREATE TABLE ORDERS ( O_ORDERKEY INTEGER NOT NULL,
O_CUSTKEY INTEGER NOT NULL,
O_ORDERSTATUS CHAR(1) NOT NULL,
O_TOTALPRICE DECIMAL(15,2) NOT NULL,
O_ORDERDATE DATE NOT NULL,
O_ORDERPRIORITY CHAR(15) NOT NULL,
O_CLERK CHAR(15) NOT NULL,
O_SHIPPRIORITY INTEGER NOT NULL,
O_COMMENT VARCHAR(79) NOT NULL);
CREATE TABLE LINEITEM ( L_ORDERKEY INTEGER NOT NULL,
L_PARTKEY INTEGER NOT NULL,
L_SUPPKEY INTEGER NOT NULL,
L_LINENUMBER INTEGER NOT NULL,
L_QUANTITY DECIMAL(15,2) NOT NULL,
L_EXTENDEDPRICE DECIMAL(15,2) NOT NULL,
L_DISCOUNT DECIMAL(15,2) NOT NULL,
L_TAX DECIMAL(15,2) NOT NULL,
L_RETURNFLAG CHAR(1) NOT NULL,
L_LINESTATUS CHAR(1) NOT NULL,
L_SHIPDATE DATE NOT NULL,
L_COMMITDATE DATE NOT NULL,
L_RECEIPTDATE DATE NOT NULL,
L_SHIPINSTRUCT CHAR(25) NOT NULL,
L_SHIPMODE CHAR(10) NOT NULL,
L_COMMENT VARCHAR(44) NOT NULL);
第4步生成了8个以tbl结尾的文件对应8张表中的数据,首先去除数据中每行末尾的‘|’,代码如下:
#include
#include
#include
using namespace std;
int main(int argc, const char * argv[]) {
string s;
ifstream in;
in.open("lineitem.tbl");
ofstream out;
out.open("lineitem1.tbl");
if (in.is_open()) {
while (getline(in, s)) {
int len = s.length();
int i = len-1;
//将最后一个竖号去掉才能满足postgresql的数据读取
if (s[i] == '|')
s[i] = '\n';
out << s;
}
}
out.close();
return 0;
}
接着通过copy命令导入数据
copy nation from '/usr/local/pgsql/sql/table/nation1.tbl' WITH DELIMITER AS '|';
copy part from '/usr/local/pgsql/sql/table/part1.tbl' WITH DELIMITER AS '|';
copy region from '/usr/local/pgsql/sql/table/region1.tbl' WITH DELIMITER AS '|';
copy partsupp from '/usr/local/pgsql/sql/table/partsupp1.tbl' WITH DELIMITER AS '|';
copy supplier from '/usr/local/pgsql/sql/table/supplier1.tbl' WITH DELIMITER AS '|';
copy customer from '/usr/local/pgsql/sql/table/customer1.tbl' WITH DELIMITER AS '|';
copy lineitem from '/usr/local/pgsql/sql/table/lineitem1.tbl' WITH DELIMITER AS '|';
copy orders from '/usr/local/pgsql/sql/table/orders1.tbl' WITH DELIMITER AS '|';
建立主外键关系要在插入数据后,否则一些数据会因为外键不存在无法插入,相信我。
-- For table REGION
ALTER TABLE REGION
ADD PRIMARY KEY (R_REGIONKEY);
-- For table NATION
ALTER TABLE NATION
ADD PRIMARY KEY (N_NATIONKEY);
ALTER TABLE NATION
ADD FOREIGN KEY (N_REGIONKEY) references REGION;
COMMIT WORK;
-- For table PART
ALTER TABLE PART
ADD PRIMARY KEY (P_PARTKEY);
COMMIT WORK;
-- For table SUPPLIER
ALTER TABLE SUPPLIER
ADD PRIMARY KEY (S_SUPPKEY);
ALTER TABLE SUPPLIER
ADD FOREIGN KEY (S_NATIONKEY) references NATION;
COMMIT WORK;
-- For table PARTSUPP
ALTER TABLE PARTSUPP
ADD PRIMARY KEY (PS_PARTKEY,PS_SUPPKEY);
COMMIT WORK;
-- For table CUSTOMER
ALTER TABLE CUSTOMER
ADD PRIMARY KEY (C_CUSTKEY);
ALTER TABLE CUSTOMER
ADD FOREIGN KEY (C_NATIONKEY) references NATION;
COMMIT WORK;
-- For table LINEITEM
ALTER TABLE LINEITEM
ADD PRIMARY KEY (L_ORDERKEY,L_LINENUMBER);
COMMIT WORK;
-- For table ORDERS
ALTER TABLE ORDERS
ADD PRIMARY KEY (O_ORDERKEY);
COMMIT WORK;
-- For table PARTSUPP
ALTER TABLE PARTSUPP
ADD FOREIGN KEY (PS_SUPPKEY) references SUPPLIER;
COMMIT WORK;
ALTER TABLE PARTSUPP
ADD FOREIGN KEY (PS_PARTKEY) references PART;
COMMIT WORK;
-- For table ORDERS
ALTER TABLE ORDERS
ADD FOREIGN KEY (O_CUSTKEY) references CUSTOMER;
COMMIT WORK;
-- For table LINEITEM
ALTER TABLE LINEITEM
ADD FOREIGN KEY (L_ORDERKEY) references ORDERS;
COMMIT WORK;
ALTER TABLE LINEITEM
ADD FOREIGN KEY (L_PARTKEY,L_SUPPKEY) references PARTSUPP;
COMMIT WORK;
好啦,齐活儿!