TPC-H生成测试数据集-Ubuntu

使用TPC-H可以自动生成大量数据,下面介绍ubuntu中的使用方法。

1.下载TPC-H的生成工具

http://www.tpc.org/tpch/ 奉上链接

网盘链接:https://pan.baidu.com/s/1u3_bwZbyWKrFGqdVzV2_7g 密码:sgnw

2.解压原文件

3.修改makefile文件

进入dbgen目录

cd dbgen

复制makefile.suite到makefile

cp makefile.suite makefile

修改makefile文件

vim makefile

修改为如下内容(gcc要是小写的,因为这个问题百度了一小时左右-_-||)
TPC-H生成测试数据集-Ubuntu_第1张图片

4.执行make,dbgen命令生成表结构和数据

如果你之前生成过数据表,可以执行make clean先清除一下再执行以下命令。

make

执行dbgen,生成数据表,大小为2G。根据自己需要设定参数。

./dbgen  -s 2  

我们会发现dbgen目录里增加了许多文件,正是我们后续需要的。

5. 建表

dss.ddl文件里为建表语句,如下:

CREATE TABLE NATION  ( N_NATIONKEY  INTEGER NOT NULL,
                            N_NAME       CHAR(25) NOT NULL,
                            N_REGIONKEY  INTEGER NOT NULL,
                            N_COMMENT    VARCHAR(152));

CREATE TABLE REGION  ( R_REGIONKEY  INTEGER NOT NULL,
                            R_NAME       CHAR(25) NOT NULL,
                            R_COMMENT    VARCHAR(152));

CREATE TABLE PART  ( P_PARTKEY     INTEGER NOT NULL,
                          P_NAME        VARCHAR(55) NOT NULL,
                          P_MFGR        CHAR(25) NOT NULL,
                          P_BRAND       CHAR(10) NOT NULL,
                          P_TYPE        VARCHAR(25) NOT NULL,
                          P_SIZE        INTEGER NOT NULL,
                          P_CONTAINER   CHAR(10) NOT NULL,
                          P_RETAILPRICE DECIMAL(15,2) NOT NULL,
                          P_COMMENT     VARCHAR(23) NOT NULL );

CREATE TABLE SUPPLIER ( S_SUPPKEY     INTEGER NOT NULL,
                             S_NAME        CHAR(25) NOT NULL,
                             S_ADDRESS     VARCHAR(40) NOT NULL,
                             S_NATIONKEY   INTEGER NOT NULL,
                             S_PHONE       CHAR(15) NOT NULL,
                             S_ACCTBAL     DECIMAL(15,2) NOT NULL,
                             S_COMMENT     VARCHAR(101) NOT NULL);

CREATE TABLE PARTSUPP ( PS_PARTKEY     INTEGER NOT NULL,
                             PS_SUPPKEY     INTEGER NOT NULL,
                             PS_AVAILQTY    INTEGER NOT NULL,
                             PS_SUPPLYCOST  DECIMAL(15,2)  NOT NULL,
                             PS_COMMENT     VARCHAR(199) NOT NULL );

CREATE TABLE CUSTOMER ( C_CUSTKEY     INTEGER NOT NULL,
                             C_NAME        VARCHAR(25) NOT NULL,
                             C_ADDRESS     VARCHAR(40) NOT NULL,
                             C_NATIONKEY   INTEGER NOT NULL,
                             C_PHONE       CHAR(15) NOT NULL,
                             C_ACCTBAL     DECIMAL(15,2)   NOT NULL,
                             C_MKTSEGMENT  CHAR(10) NOT NULL,
                             C_COMMENT     VARCHAR(117) NOT NULL);

CREATE TABLE ORDERS  ( O_ORDERKEY       INTEGER NOT NULL,
                           O_CUSTKEY        INTEGER NOT NULL,
                           O_ORDERSTATUS    CHAR(1) NOT NULL,
                           O_TOTALPRICE     DECIMAL(15,2) NOT NULL,
                           O_ORDERDATE      DATE NOT NULL,
                           O_ORDERPRIORITY  CHAR(15) NOT NULL,  
                           O_CLERK          CHAR(15) NOT NULL, 
                           O_SHIPPRIORITY   INTEGER NOT NULL,
                           O_COMMENT        VARCHAR(79) NOT NULL);

CREATE TABLE LINEITEM ( L_ORDERKEY    INTEGER NOT NULL,
                             L_PARTKEY     INTEGER NOT NULL,
                             L_SUPPKEY     INTEGER NOT NULL,
                             L_LINENUMBER  INTEGER NOT NULL,
                             L_QUANTITY    DECIMAL(15,2) NOT NULL,
                             L_EXTENDEDPRICE  DECIMAL(15,2) NOT NULL,
                             L_DISCOUNT    DECIMAL(15,2) NOT NULL,
                             L_TAX         DECIMAL(15,2) NOT NULL,
                             L_RETURNFLAG  CHAR(1) NOT NULL,
                             L_LINESTATUS  CHAR(1) NOT NULL,
                             L_SHIPDATE    DATE NOT NULL,
                             L_COMMITDATE  DATE NOT NULL,
                             L_RECEIPTDATE DATE NOT NULL,
                             L_SHIPINSTRUCT CHAR(25) NOT NULL,
                             L_SHIPMODE     CHAR(10) NOT NULL,
                             L_COMMENT      VARCHAR(44) NOT NULL);

6. 插入数据

第4步生成了8个以tbl结尾的文件对应8张表中的数据,首先去除数据中每行末尾的‘|’,代码如下:

#include 
#include 
#include 
using namespace std;
int main(int argc, const char * argv[]) {
    string s;
    ifstream in;
    in.open("lineitem.tbl");
    ofstream out;
    out.open("lineitem1.tbl");
    if (in.is_open()) {
        while (getline(in, s)) {
            int len = s.length();
            int i = len-1;
            //将最后一个竖号去掉才能满足postgresql的数据读取
            if (s[i] == '|') 
                s[i] = '\n';
            out << s;
        }
    }
    out.close();
    return 0;
}

接着通过copy命令导入数据

copy nation from '/usr/local/pgsql/sql/table/nation1.tbl' WITH DELIMITER AS '|';
copy part from '/usr/local/pgsql/sql/table/part1.tbl' WITH DELIMITER AS '|';
copy region from '/usr/local/pgsql/sql/table/region1.tbl' WITH DELIMITER AS '|';
copy partsupp from '/usr/local/pgsql/sql/table/partsupp1.tbl' WITH DELIMITER AS '|';
copy supplier from '/usr/local/pgsql/sql/table/supplier1.tbl' WITH DELIMITER AS '|';
copy customer from '/usr/local/pgsql/sql/table/customer1.tbl' WITH DELIMITER AS '|';
copy lineitem from '/usr/local/pgsql/sql/table/lineitem1.tbl' WITH DELIMITER AS '|';
copy orders from '/usr/local/pgsql/sql/table/orders1.tbl' WITH DELIMITER AS '|';

7.建立主外键关系

建立主外键关系要在插入数据后,否则一些数据会因为外键不存在无法插入,相信我。

-- For table REGION
ALTER TABLE REGION
ADD PRIMARY KEY (R_REGIONKEY);

-- For table NATION
ALTER TABLE NATION
ADD PRIMARY KEY (N_NATIONKEY);

ALTER TABLE NATION
ADD FOREIGN KEY (N_REGIONKEY) references REGION;

COMMIT WORK;

-- For table PART
ALTER TABLE PART
ADD PRIMARY KEY (P_PARTKEY);

COMMIT WORK;

-- For table SUPPLIER
ALTER TABLE SUPPLIER
ADD PRIMARY KEY (S_SUPPKEY);

ALTER TABLE SUPPLIER
ADD FOREIGN KEY (S_NATIONKEY) references NATION;

COMMIT WORK;

-- For table PARTSUPP
ALTER TABLE PARTSUPP
ADD PRIMARY KEY (PS_PARTKEY,PS_SUPPKEY);

COMMIT WORK;

-- For table CUSTOMER
ALTER TABLE CUSTOMER
ADD PRIMARY KEY (C_CUSTKEY);

ALTER TABLE CUSTOMER
ADD FOREIGN KEY (C_NATIONKEY) references NATION;

COMMIT WORK;

-- For table LINEITEM
ALTER TABLE LINEITEM
ADD PRIMARY KEY (L_ORDERKEY,L_LINENUMBER);

COMMIT WORK;

-- For table ORDERS
ALTER TABLE ORDERS
ADD PRIMARY KEY (O_ORDERKEY);

COMMIT WORK;

-- For table PARTSUPP
ALTER TABLE PARTSUPP
ADD FOREIGN KEY (PS_SUPPKEY) references SUPPLIER;

COMMIT WORK;

ALTER TABLE PARTSUPP
ADD FOREIGN KEY (PS_PARTKEY) references PART;

COMMIT WORK;

-- For table ORDERS
ALTER TABLE ORDERS
ADD FOREIGN KEY (O_CUSTKEY) references CUSTOMER;

COMMIT WORK;

-- For table LINEITEM
ALTER TABLE LINEITEM
ADD FOREIGN KEY (L_ORDERKEY)  references ORDERS;

COMMIT WORK;

ALTER TABLE LINEITEM
ADD FOREIGN KEY (L_PARTKEY,L_SUPPKEY) references PARTSUPP;

COMMIT WORK;

好啦,齐活儿!

你可能感兴趣的:(Postgresql,TPC-H)