https://blog.csdn.net/justlpf/article/details/106859262
https://blog.csdn.net/qq_34474071/article/details/123545871
https://www.jianshu.com/p/173de219379e
https://t.cj.sina.com.cn/articles/view/1798777247/6b37299f019016pio
yum install -y gcc gcc-c++ expect byacc flex bison
git clone [email protected]:gregrahn/tpcds-kit.git
https://www.cnblogs.com/syw20170419/p/16204199.html
数据导入:https://blog.csdn.net/weixin_39636364/article/details/124882481
### 注意生成数据保存路径,要提前创建
[hadoop@hadoop02 tools]$ ./dsdgen -SCALE 1GB -DIR /opt/module/tpcdsdata/
dsdgen Population Generator (Version 2.10.0)
Copyright Transaction Processing Performance Council (TPC) 2001 - 2018
Warning: This scale factor is valid for QUALIFICATION ONLY
ERROR: Failed to open output file!
File: print.c
Line: 490
[hadoop@hadoop02 tools]$ ./dsdgen -SCALE 1GB -DIR /opt/module/tpcdsdata/
dsdgen Population Generator (Version 2.10.0)
Copyright Transaction Processing Performance Council (TPC) 2001 - 2018
Warning: This scale factor is valid for QUALIFICATION ONLY
dialect
这里选择netezza
,SQL查询分页语法与要测试的kudu语句一致(可选:oracel、netezza、sqlserver、db2)netezza分页语法是limit所以这里选择它)
[hadoop@hadoop02 tools]$ ll | grep tpcds
...
-rw-rw-r-- 1 hadoop hadoop 13875 2022-08-23 09:44:27 tpcds_ri.sql
-rw-rw-r-- 1 hadoop hadoop 22153 2022-08-23 09:44:27 tpcds_source.sql
-rw-rw-r-- 1 hadoop hadoop 30001 2022-08-23 09:44:27 tpcds.sql
...
[hadoop@hadoop02 tools]$
TPC-DS已经提前准备好了创建表相关的SQL文件,文件位于tools目录下。
但是由于各种数据库之间语法差异,无法直接使用,需要手动改造。
[hadoop@hadoop02 tools]$ ./dsqgen -DIRECTORY ../query_templates -INPUT ../query_templates/templates.lst -VERBOSE Y -QUALIFY Y -SCALE 100 -DIALECT sqlserver -OUTPUT_DIR /opt/module/tpcdsquerydoris
[hadoop@hadoop02 tpcdsquerydoris]$ pwd
/opt/module/tpcdsquerydoris
[hadoop@hadoop02 tpcdsquerydoris]$ ll
total 168
-rw-rw-r-- 1 hadoop hadoop 170202 2022-08-23 13:32:28 query_0.sql
-bash-4.2# cat customer_address.dat | sed 's/.$//' | curl --location-trusted -u root:123456 -H "label:customer_address2" -H "timeout:1200" -H "column_separator:|" -T - http://172.16.34.127:32030/api/test_db/customer_address/_stream_load
{
"TxnId": 58492,
"Label": "customer_address2",
"TwoPhaseCommit": "false",
"Status": "Success",
"Message": "OK",
"NumberTotalRows": 50000,
"NumberLoadedRows": 50000,
"NumberFilteredRows": 0,
"NumberUnselectedRows": 0,
"LoadBytes": 5452165,
"LoadTimeMs": 706,
"BeginTxnTimeMs": 2,
"StreamLoadPutTimeMs": 3,
"ReadDataTimeMs": 15,
"WriteDataTimeMs": 686,
"CommitAndPublishTimeMs": 13
}
LOAD DATA LOCAL INFILE '/opt/call_center.dat' INTO TABLE call_center FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE '/opt/customer.dat' INTO TABLE customer FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE '/opt/income_band.dat' INTO TABLE income_band FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE '/opt/ship_mode.dat' INTO TABLE ship_mode FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE '/opt/warehouse.dat' INTO TABLE warehouse FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE '/opt/catalog_page.dat' INTO TABLE catalog_page FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE '/opt/customer_demographics.dat' INTO TABLE customer_demographics FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE '/opt/inventory.dat' INTO TABLE inventory FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE '/opt/store.dat' INTO TABLE store FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE '/opt/web_page.dat' INTO TABLE web_page FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE '/opt/catalog_returns.dat' INTO TABLE catalog_returns FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE '/opt/date_dim.dat' INTO TABLE date_dim FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE '/opt/item.dat' INTO TABLE item FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE '/opt/store_returns.dat' INTO TABLE store_returns FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE '/opt/web_returns.dat' INTO TABLE web_returns FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE '/opt/catalog_sales.dat' INTO TABLE catalog_sales FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE '/opt/dbgen_version.dat' INTO TABLE dbgen_version FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE '/opt/promotion.dat' INTO TABLE promotion FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE '/opt/store_sales.dat' INTO TABLE store_sales FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE '/opt/web_sales.dat' INTO TABLE web_sales FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE '/opt/customer_address.dat' INTO TABLE customer_address FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE '/opt/household_demographics.dat' INTO TABLE household_demographics FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE '/opt/reason.dat' INTO TABLE reason FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE '/opt/time_dim.dat' INTO TABLE time_dim FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
LOAD DATA LOCAL INFILE '/opt/web_site.dat' INTO TABLE web_site FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n';
###########
select 'call_center',count(1) from call_center
union all
select 'catalog_page',count(1) from catalog_page
union all
select 'catalog_returns',count(1) from catalog_returns
union all
select 'catalog_sales',count(1) from catalog_sales
union all
select 'customer',count(1) from customer
union all
select 'customer_address',count(1) from customer_address
union all
select 'customer_demographics',count(1) from customer_demographics
union all
select 'date_dim',count(1) from date_dim
union all
select 'household_demographics',count(1) from household_demographics
union all
select 'inventory',count(1) from inventory
union all
select 'item',count(1) from item
union all
select 'promotion',count(1) from promotion
union all
select 'reason',count(1) from reason
union all
select 'ship_mode',count(1) from ship_mode
union all
select 'store',count(1) from store
union all
select 'store_returns',count(1) from store_returns
union all
select 'store_sales',count(1) from store_sales
union all
select 'time_dim',count(1) from time_dim
union all
select 'warehouse',count(1) from warehouse
union all
select 'web_page',count(1) from web_page
union all
select 'web_returns',count(1) from web_returns
union all
select 'web_sales',count(1) from web_sales
union all
select 'web_site',count(1) from web_site
https://www.cnblogs.com/syw20170419/category/2116829.html
https://blog.csdn.net/sinat_37316828/article/details/106795862
https://cloud.tencent.com/developer/article/2010886
https://blog.csdn.net/marising/article/details/105974151
https://blog.csdn.net/qq_41187116/article/details/125814089?spm=1001.2014.3001.5502
https://blog.csdn.net/hf200012/article/details/125533767
参考:https://blog.csdn.net/benladeng29hao/article/details/109111367
执行以上文章内容依然报同样问题,https://qa.1r1g.com/sf/ask/298377201/ 看到这篇文章,执行 make clean
# cat customer_address.dat | curl --location-trusted -u root:123456 -H "label:customer_address2" -H "timeout:1200" -H "column_separator:|" -T - http://172.16.34.127:32030/api/test_db/customer_address/_stream_load
{
"TxnId": 58491,
"Label": "customer_address2",
"TwoPhaseCommit": "false",
"Status": "Fail",
"Message": "too many filtered rows",
"NumberTotalRows": 52,
"NumberLoadedRows": 0,
"NumberFilteredRows": 52,
"NumberUnselectedRows": 0,
"LoadBytes": 5502165,
"LoadTimeMs": 192,
"BeginTxnTimeMs": 0,
"StreamLoadPutTimeMs": 1,
"ReadDataTimeMs": 17,
"WriteDataTimeMs": 188,
"CommitAndPublishTimeMs": 0,
"ErrorURL": "http://172.16.34.230:8040/api/_load_error_log?file=__shard_0/error_log_insert_stmt_9649720a2d4907e9-86b0766ca5765485_9649720a2d4907e9_86b0766ca5765485"
}