\i /pathA/dss.sql
# 该文件对应的路径
(2)dss.ri 是表之间的限制条件,也将内容复制到sql文件下,dss_ri,sql,然后执行(如果在导入数据的时候报错,先导入数据,再进行规约)
这里红色的框内需要删除或者注释掉,注意TPCD也需要删除
注意:数据约束这里有问题,需要修改下表内的内容,把外键这里的语句改成如下格式。
ADD FOREIGN KEY (L_PARTKEY,L_SUPPKEY) references PARTSUPP;
\i /pathA/dss_ri.sql
# 该文件对应的路径
make -f Makefile
(2)生成数据
生成8个tbl文件,生成以后修改八个文件的可执行权限,将这八个tbl文件放入创建好的文件目录tbl下(注意1表示1G数据量,0.1表示100M数据量)
./dbgen -s 1 -f
chmod 777 tbl/*.tbl
(3)数据导入
copy customer from '/home/ywb/Data/tpch-kit-master/dbgen/tbl/customer.tbl' with delimiter as '|' NULL '';
copy lineitem from '/home/ywb/Data/tpch-kit-master/dbgen/tbl/lineitem.tbl' with delimiter as '|' NULL '';
copy nation from '/home/ywb/Data/tpch-kit-master/dbgen/tbl/nation.tbl' with delimiter as '|' NULL '';
copy orders from '/home/ywb/Data/tpch-kit-master/dbgen/tbl/orders.tbl' with delimiter as '|' NULL '';
copy part from '/home/ywb/Data/tpch-kit-master/dbgen/tbl/part.tbl' with delimiter as '|' NULL '';
copy partsupp from '/home/ywb/Data/tpch-kit-master/dbgen/tbl/partsupp.tbl' with delimiter as '|' NULL '';
copy region from '/home/ywb/Data/tpch-kit-master/dbgen/tbl/region.tbl' with delimiter as '|' NULL '';
copy supplier from '/home/ywb/Data/tpch-kit-master/dbgen/tbl/supplier.tbl' with delimiter as '|' NULL '';
或者命令行
load data local infile '/home/Data/tpch-kit-master/dbgen/tbl/region.tbl' into table region fields terminated by '|' lines terminated by '\n';
load data local infile '/home/Data/tpch-kit-master/dbgen/tbl/nation.tbl' into table nation fields terminated by '|' lines terminated by '\n';
load data local infile '/home/Data/tpch-kit-master/dbgen/tbl/customer.tbl' into table customer fields terminated by '|' lines terminated by '\n';
load data local infile '/home/Data/tpch-kit-master/dbgen/tbl/supplier.tbl' into table supplier fields terminated by '|' lines terminated by '\n';
load data local infile '/home/Data/tpch-kit-master/dbgen/tbl/part.tbl' into table part fields terminated by '|' lines terminated by '\n';
load data local infile '/home/Data/tpch-kit-master/dbgen/tbl/orders.tbl' into table orders fields terminated by '|' lines terminated by '\n';
load data local infile '/home/Data/tpch-kit-master/dbgen/tbl/partsupp.tbl' into table partsupp fields terminated by '|' lines terminated by '\n';
load data local infile '/home/Data/tpch-kit-master/dbgen/tbl/lineitem.tbl' into table lineitem fields terminated by '|' lines terminated by '\n';
for i in {1..22}
do
name="d$i.sql"
echo $name
./qgen -d $i >$name
done
\i /pathA/tpcds.sql
# 该文件对应的路径
(2)tpcds_ri.sql 是表之间的限制条件
注意这一步表的约束最好放到导入数据完以后再执行,否则主外键的的错误。
\i /pathA/tpcds_ri.sql
# 该文件对应的路径
sudo apt-get install byacc # 或者 apt-get install yacc
sudo apt-get install flex
make -f Makefile.suite # dsdgen、dsqgen 生成
./dsdgen -sc 1 -DIR 'data_output_path'
# ./dsdgen -sc 1 -DIR 'data/' # data是我创建的数据目录位置,在tools下
# ./dsdgen -h 查看用法
#data_output_path: 数据生成路径
# 如果没有dsdgen,则执行命令:
# make -f Makefile.suite
(2)修改文件权限
修改data/下所有.dat文件的可执行权限
chmod 777 data/*.dat
(3) 数据导入
注意:生成的数据中会多一个 ’ | ’ 字符,导致导入数据报错,所以需要先删除每行最后一个 |,直接在dat文件目录下终端执行该命令。
删除多余的 |
for i in `ls *.dat`
do
name=$i
echo $name
sed -i 's#|$##g' $name
done
导入数据
copy call_center from '/home/ywb/Data/tpcds-kit-master/tools/data/call_center.dat' with delimiter as '|' NULL '';
copy catalog_page from '/home/ywb/Data/tpcds-kit-master/tools/data/catalog_page.dat' with delimiter as '|' NULL '';
copy catalog_returns from '/home/ywb/Data/tpcds-kit-master/tools/data/catalog_returns.dat' with delimiter as '|' NULL '';
copy catalog_sales from '/home/ywb/Data/tpcds-kit-master/tools/data/catalog_sales.dat' with delimiter as '|' NULL '';
copy customer from '/home/ywb/Data/tpcds-kit-master/tools/data/customer.dat' with delimiter as '|' NULL '';
copy customer_address from '/home/ywb/Data/tpcds-kit-master/tools/data/customer_address.dat' with delimiter as '|' NULL '';
copy customer_demographics from '/home/ywb/Data/tpcds-kit-master/tools/data/customer_demographics.dat' with delimiter as '|' NULL '';
copy date_dim from '/home/ywb/Data/tpcds-kit-master/tools/data/date_dim.dat' with delimiter as '|' NULL '';
copy dbgen_version from '/home/ywb/Data/tpcds-kit-master/tools/data/dbgen_version.dat' with delimiter as '|' NULL '';
copy household_demographics from '/home/ywb/Data/tpcds-kit-master/tools/data/household_demographics.dat' with delimiter as '|' NULL '';
copy income_band from '/home/ywb/Data/tpcds-kit-master/tools/data/income_band.dat' with delimiter as '|' NULL '';
copy inventory from '/home/ywb/Data/tpcds-kit-master/tools/data/inventory.dat' with delimiter as '|' NULL '';
copy item from '/home/ywb/Data/tpcds-kit-master/tools/data/item.dat' with delimiter as '|' NULL '';
copy promotion from '/home/ywb/Data/tpcds-kit-master/tools/data/promotion.dat' with delimiter as '|' NULL '';
copy reason from '/home/ywb/Data/tpcds-kit-master/tools/data/reason.dat' with delimiter as '|' NULL '';
copy ship_mode from '/home/ywb/Data/tpcds-kit-master/tools/data/ship_mode.dat' with delimiter as '|' NULL '';
copy store from '/home/ywb/Data/tpcds-kit-master/tools/data/store.dat' with delimiter as '|' NULL '';
copy store_returns from '/home/ywb/Data/tpcds-kit-master/tools/data/store_returns.dat' with delimiter as '|' NULL '';
copy store_sales from '/home/ywb/Data/tpcds-kit-master/tools/data/store_sales.dat' with delimiter as '|' NULL '';
copy time_dim from '/home/ywb/Data/tpcds-kit-master/tools/data/time_dim.dat' with delimiter as '|' NULL '';
copy warehouse from '/home/ywb/Data/tpcds-kit-master/tools/data/warehouse.dat' with delimiter as '|' NULL '';
copy web_page from '/home/ywb/Data/tpcds-kit-master/tools/data/web_page.dat' with delimiter as '|' NULL '';
copy web_returns from '/home/ywb/Data/tpcds-kit-master/tools/data/web_returns.dat' with delimiter as '|' NULL '';
copy web_sales from '/home/ywb/Data/tpcds-kit-master/tools/data/web_sales.dat' with delimiter as '|' NULL '';
copy web_site from '/home/ywb/Data/tpcds-kit-master/tools/data/web_site.dat' with delimiter as '|' NULL '';
(4)生成查询
for i in `seq 1 99`
do
./dsqgen -DIRECTORY ../query_templates/ -TEMPLATE "query${i}.tpl" -DIALECT netezza -FILTER Y > ../sql/query${i}.sql
done
注意:因为query_templates只有99个查询模板,所以最多一次只能生成99个查询语句,但是如果生成多次,生成的99个查询语句是一样
-DIRECTORY:SQL模板的路径。 这个是查询模板的文件目录位置 …/query_templates/
-TEMPLATE:SQL模板的名称
-DIALECT:include query dialect defintions found in < s >.tpl 。-DIALECT postgresql 是哪种数据库的查询语句
注意:DIALECT支持oracle,db2,sqlserver,netezza,ansi,但是没有postgresql。可以去query_templates文件目录下看是否有对应的模板文件,推荐netezza,pg可以解析。
-FILTER:重定向到标准输出,即…/sql/query${i}.sql 是输出路径和文件类型
i /path/xxx.sql;
\i /path/xxx.sql;
\copy info_type from '/home/ywb/Data/imdb/info_type.csv' with delimiter as ',' csv quote '"' escape as '\';
注意:这里有些csv字段中包含了分隔符逗号,所以用之前的命令会报错,注意csv quote '"' escape as '\'
里面是双引号,这里真的需要小心,数据格式有点问题,搞了我好久。
\copy aka_name from '/home/ywb/Data/imdb/aka_name.csv' with delimiter as ',' csv quote '"' escape as '\';
\copy aka_title from '/home/ywb/Data/imdb/aka_title.csv' with delimiter as ',' csv quote '"' escape as '\';
\copy cast_info from '/home/ywb/Data/imdb/cast_info.csv' with delimiter as ',' csv quote '"' escape as '\';
\copy char_name from '/home/ywb/Data/imdb/char_name.csv' with delimiter as ',' csv quote '"' escape as '\';
\copy comp_cast_type from '/home/ywb/Data/imdb/comp_cast_type.csv' with delimiter as ',' csv quote '"' escape as '\';
\copy company_name from '/home/ywb/Data/imdb/company_name.csv' with delimiter as ',' csv quote '"' escape as '\';
\copy company_type from '/home/ywb/Data/imdb/company_type.csv' with delimiter as ',' csv quote '"' escape as '\';
\copy complete_cast from '/home/ywb/Data/imdb/complete_cast.csv' with delimiter as ',' csv quote '"' escape as '\';
\copy info_type from '/home/ywb/Data/imdb/info_type.csv' with delimiter as ',' csv quote '"' escape as '\';
\copy keyword from '/home/ywb/Data/imdb/keyword.csv' with delimiter as ',' csv quote '"' escape as '\';
\copy kind_type from '/home/ywb/Data/imdb/kind_type.csv' with delimiter as ',' csv quote '"' escape as '\';
\copy link_type from '/home/ywb/Data/imdb/link_type.csv' with delimiter as ',' csv quote '"' escape as '\';
\copy movie_companies from '/home/ywb/Data/imdb/movie_companies.csv' with delimiter as ',' csv quote '"' escape as '\';
\copy movie_info from '/home/ywb/Data/imdb/movie_info.csv' with delimiter as ',' csv quote '"' escape as '\';
\copy movie_info_idx from '/home/ywb/Data/imdb/movie_info_idx.csv' with delimiter as ',' csv quote '"' escape as '\';
\copy movie_keyword from '/home/ywb/Data/imdb/movie_keyword.csv' with delimiter as ',' csv quote '"' escape as '\';
\copy movie_link from '/home/ywb/Data/imdb/movie_link.csv' with delimiter as ',' csv quote '"' escape as '\';
\copy name from '/home/ywb/Data/imdb/name.csv' with delimiter as ',' csv quote '"' escape as '\';
\copy person_info from '/home/ywb/Data/imdb/person_info.csv' with delimiter as ',' csv quote '"' escape as '\';
\copy role_type from '/home/ywb/Data/imdb/role_type.csv' with delimiter as ',' csv quote '"' escape as '\';
\copy title from '/home/ywb/Data/imdb/title.csv' with delimiter as ',' csv quote '"' escape as '\';
delimiter指定了字段之间的分隔符号位逗号
escape指定了在引号中的转义字符为反斜杠,这样即使在引号字串中存在引号本身,也可以用该字符进行转义,变为一般的引号字符,而不是字段终结
header true:指定文件中存在表头。如果没有的话,则设置为false
quote指定了以双引号作为字符串字段的引号,这样它会将双引号内的内容作为一个字段值来进行处理