目录
一,测试目的
二,测试环境
1,硬件环境
2,软件环境
三,数据生成
四,hive测试
1,创建hive库表
测试MPP架构数据库的性能。
3台虚拟机 |
|
组件 |
配置 |
CPU |
32核心 |
内存 |
128 GB |
磁盘 |
4T |
网卡 |
千兆网卡 |
OS |
RedHat7.2 |
Hadoop版本:2.7.3;一台NameNode节点,两台DataNode节点。
hive版本:1.2
spark版本:2.0
生成数据量大小500G。首先创建生成文件路径:
hadoop fs -mkdir /500G
通过tpcds工具生成,tpcds工具在官网下载,下载后上传到服务器解压,在当前目录执行如下命令(-s表示数据大小500G,-d表示数据在hdfs上存放的路径):
hadoop jar ./target/tpcds-gen-1.1.jar -d /500g/ -s 500
登录hive cli,首先创建数据库tpc:
hive> create database tpc;
然后创建表,本次测试只需创建10张外表,表结构如下:
/*
catalog_sales
warehouse
ship_mode
call_center
date_dim
item
inventory
store_sales
store_returns
store
*/
create external table if not exists tpc.catalog_sales
(
cs_sold_date_sk int ,
cs_sold_time_sk int ,
cs_ship_date_sk int ,
cs_bill_customer_sk int ,
cs_bill_cdemo_sk int ,
cs_bill_hdemo_sk int ,
cs_bill_addr_sk int ,
cs_ship_customer_sk int ,
cs_ship_cdemo_sk int ,
cs_ship_hdemo_sk int ,
cs_ship_addr_sk int ,
cs_call_center_sk int ,
cs_catalog_page_sk int ,
cs_ship_mode_sk int ,
cs_warehouse_sk int ,
cs_item_sk int ,
cs_promo_sk int ,
cs_order_number int ,
cs_quantity int ,
cs_wholesale_cost decimal(7,2) ,
cs_list_price decimal(7,2) ,
cs_sales_price decimal(7,2) ,
cs_ext_discount_amt decimal(7,2) ,
cs_ext_sales_price decimal(7,2) ,
cs_ext_wholesale_cost decimal(7,2) ,
cs_ext_list_price decimal(7,2) ,
cs_ext_tax decimal(7,2) ,
cs_coupon_amt decimal(7,2) ,
cs_ext_ship_cost decimal(7,2) ,
cs_net_paid decimal(7,2) ,
cs_net_paid_inc_tax decimal(7,2) ,
cs_net_paid_inc_ship decimal(7,2) ,
cs_net_paid_inc_ship_tax decimal(7,2) ,
cs_net_profit decimal(7,2)
)row format delimited fields terminated by '|'
location '/500g/catalog_sales';
create external table if not exists tpc.warehouse
(
w_warehouse_sk int ,
w_warehouse_id char(16) ,
w_warehouse_name varchar(20) ,
w_warehouse_sq_ft int ,
w_street_number char(10) ,
w_street_name varchar(60) ,
w_street_type char(15) ,
w_suite_number char(10) ,
w_city varchar(60) ,
w_county varchar(30) ,
w_state char(2) ,
w_zip char(10) ,
w_country varchar(20) ,
w_gmt_offset decimal(5,2)
)row format delimited fields terminated by '|'
location '/500g/warehouse';
create external table if not exists tpc.ship_mode
(
sm_ship_mode_sk int ,
sm_ship_mode_id char(16) ,
sm_type char(30) ,
sm_code char(10) ,
sm_carrier char(20) ,
sm_contract char(20)
)row format delimited fields terminated by '|'
location '/500g/ship_mode';
create external table if not exists tpc.call_center
(
cc_call_center_sk int ,
cc_call_center_id char(16) ,
cc_rec_start_date date ,
cc_rec_end_date date ,
cc_closed_date_sk int ,
cc_open_date_sk int ,
cc_name varchar(50) ,
cc_class varchar(50) ,
cc_employees int ,
cc_sq_ft int ,
cc_hours char(20) ,
cc_manager varchar(40) ,
cc_mkt_id int ,
cc_mkt_class char(50) ,
cc_mkt_desc varchar(100) ,
cc_market_manager varchar(40) ,
cc_division int ,
cc_division_name varchar(50) ,
cc_company int ,
cc_company_name char(50) ,
cc_street_number char(10) ,
cc_street_name varchar(60) ,
cc_street_type char(15) ,
cc_suite_number char(10) ,
cc_city varchar(60) ,
cc_county varchar(30) ,
cc_state char(2) ,
cc_zip char(10) ,
cc_country varchar(20) ,
cc_gmt_offset decimal(5,2) ,
cc_tax_percentage decimal(5,2)
)row format delimited fields terminated by '|'
location '/500g/call_center';
create external table if not exists tpc.date_dim
(
d_date_sk int ,
d_date_id char(16) ,
d_date timestamp ,
d_month_seq int ,
d_week_seq int ,
d_quarter_seq int ,
d_year int ,
d_dow int ,
d_moy int ,
d_dom int ,
d_qoy int ,
d_fy_year int ,
d_fy_quarter_seq int ,
d_fy_week_seq int ,
d_day_name char(9) ,
d_quarter_name char(6) ,
d_holiday char(1) ,
d_weekend char(1) ,
d_following_holiday char(1) ,
d_first_dom int ,
d_last_dom int ,
d_same_day_ly int ,
d_same_day_lq int ,
d_current_day char(1) ,
d_current_week char(1) ,
d_current_month char(1) ,
d_current_quarter char(1) ,
d_current_year char(1)
)row format delimited fields terminated by '|'
location '/500g/date_dim';
create external table if not exists tpc.item
(
i_item_sk int ,
i_item_id char(16) ,
i_rec_start_date timestamp ,
i_rec_end_date timestamp ,
i_item_desc varchar(200) ,
i_current_price decimal(7,2) ,
i_wholesale_cost decimal(7,2) ,
i_brand_id int ,
i_brand char(50) ,
i_class_id int ,
i_class char(50) ,
i_category_id int ,
i_category char(50) ,
i_manufact_id int ,
i_manufact char(50) ,
i_size char(20) ,
i_formulation char(20) ,
i_color char(20) ,
i_units char(10) ,
i_container char(10) ,
i_manager_id int ,
i_product_name char(50)
)row format delimited fields terminated by '|'
location '/500g/item';
create external table if not exists tpc.inventory
(
inv_date_sk int ,
inv_item_sk int ,
inv_warehouse_sk int ,
inv_quantity_on_hand int
)row format delimited fields terminated by '|'
location '/500g/inventory';
create external table if not exists tpc.store_sales
(
ss_sold_date_sk int ,
ss_sold_time_sk int ,
ss_item_sk int ,
ss_customer_sk int ,
ss_cdemo_sk int ,
ss_hdemo_sk int ,
ss_addr_sk int ,
ss_store_sk int ,
ss_promo_sk int ,
ss_ticket_number int ,
ss_quantity int ,
ss_wholesale_cost decimal(7,2) ,
ss_list_price decimal(7,2) ,
ss_sales_price decimal(7,2) ,
ss_ext_discount_amt decimal(7,2) ,
ss_ext_sales_price decimal(7,2) ,
ss_ext_wholesale_cost decimal(7,2) ,
ss_ext_list_price decimal(7,2) ,
ss_ext_tax decimal(7,2) ,
ss_coupon_amt decimal(7,2) ,
ss_net_paid decimal(7,2) ,
ss_net_paid_inc_tax decimal(7,2) ,
ss_net_profit decimal(7,2)
)row format delimited fields terminated by '|'
location '/500g/store_sales';
create external table if not exists tpc.store_returns
(
sr_returned_date_sk int ,
sr_return_time_sk int ,
sr_item_sk int ,
sr_customer_sk int ,
sr_cdemo_sk int ,
sr_hdemo_sk int ,
sr_addr_sk int ,
sr_store_sk int ,
sr_reason_sk int ,
sr_ticket_number int ,
sr_return_quantity int ,
sr_return_amt decimal(7,2) ,
sr_return_tax decimal(7,2) ,
sr_return_amt_inc_tax decimal(7,2) ,
sr_fee decimal(7,2) ,
sr_return_ship_cost decimal(7,2) ,
sr_refunded_cash decimal(7,2) ,
sr_reversed_charge decimal(7,2) ,
sr_store_credit decimal(7,2) ,
sr_net_loss decimal(7,2)
)row format delimited fields terminated by '|'
location '/500g/store_returns';
create external table if not exists tpc.store
(
s_store_sk int ,
s_store_id char(16) ,
s_rec_start_date date ,
s_rec_end_date date ,
s_closed_date_sk int ,
s_store_name varchar(50) ,
s_number_employees int ,
s_floor_space int ,
s_hours char(20) ,
s_manager varchar(40) ,
s_market_id int ,
s_geography_class varchar(100) ,
s_market_desc varchar(100) ,
s_market_manager varchar(40) ,
s_division_id int ,
s_division_name varchar(50) ,
s_company_id int ,
s_company_name varchar(50) ,
s_street_number varchar(10) ,
s_street_name varchar(60) ,
s_street_type char(15) ,
s_suite_number char(10) ,
s_city varchar(60) ,
s_county varchar(30) ,
s_state char(2) ,
s_zip char(10) ,
s_country varchar(20) ,
s_gmt_offset decimal(5,2) ,
s_tax_precentage decimal(5,2)
)row format delimited fields terminated by '|'
location '/500g/store';
首先清空缓存:
echo 1 > /proc/sys/vm/drop_caches
echo 2 > /proc/sys/vm/drop_caches
echo 3 > /proc/sys/vm/drop_caches
测试项 | hive |
单表查询14.39亿条数据(select count(*) from store_sales;),MR引擎 | 634.38 seconds |
单表查询14.39亿条数据(select count(*) from store_sales;),TEZ引擎 | 592.691 seconds |
hdfs数据下载到本地
hadoop fs -get /500g/* ./
创建表结构,导入数据。
/*
catalog_sales
warehouse
ship_mode
call_center
date_dim
item
inventory
store_sales
store_returns
store
*/
create table if not exists tpc_in.catalog_sales
(
cs_sold_date_sk int ,
cs_sold_time_sk int ,
cs_ship_date_sk int ,
cs_bill_customer_sk int ,
cs_bill_cdemo_sk int ,
cs_bill_hdemo_sk int ,
cs_bill_addr_sk int ,
cs_ship_customer_sk int ,
cs_ship_cdemo_sk int ,
cs_ship_hdemo_sk int ,
cs_ship_addr_sk int ,
cs_call_center_sk int ,
cs_catalog_page_sk int ,
cs_ship_mode_sk int ,
cs_warehouse_sk int ,
cs_item_sk int ,
cs_promo_sk int ,
cs_order_number int ,
cs_quantity int ,
cs_wholesale_cost decimal(7,2) ,
cs_list_price decimal(7,2) ,
cs_sales_price decimal(7,2) ,
cs_ext_discount_amt decimal(7,2) ,
cs_ext_sales_price decimal(7,2) ,
cs_ext_wholesale_cost decimal(7,2) ,
cs_ext_list_price decimal(7,2) ,
cs_ext_tax decimal(7,2) ,
cs_coupon_amt decimal(7,2) ,
cs_ext_ship_cost decimal(7,2) ,
cs_net_paid decimal(7,2) ,
cs_net_paid_inc_tax decimal(7,2) ,
cs_net_paid_inc_ship decimal(7,2) ,
cs_net_paid_inc_ship_tax decimal(7,2) ,
cs_net_profit decimal(7,2)
)row format delimited fields terminated by '|' ;
load data local inpath '/data/lhy/data/catalog_sales/*' overwrite into table catalog_sales;
create table if not exists tpc_in.warehouse
(
w_warehouse_sk int ,
w_warehouse_id char(16) ,
w_warehouse_name varchar(20) ,
w_warehouse_sq_ft int ,
w_street_number char(10) ,
w_street_name varchar(60) ,
w_street_type char(15) ,
w_suite_number char(10) ,
w_city varchar(60) ,
w_county varchar(30) ,
w_state char(2) ,
w_zip char(10) ,
w_country varchar(20) ,
w_gmt_offset decimal(5,2)
)row format delimited fields terminated by '|' ;
load data local inpath '/data/lhy/data/warehouse/*' overwrite into table warehouse;
create table if not exists tpc_in.ship_mode
(
sm_ship_mode_sk int ,
sm_ship_mode_id char(16) ,
sm_type char(30) ,
sm_code char(10) ,
sm_carrier char(20) ,
sm_contract char(20)
)row format delimited fields terminated by '|' ;
load data local inpath '/data/lhy/data/ship_mode/*' overwrite into table ship_mode;
create table if not exists tpc_in.call_center
(
cc_call_center_sk int ,
cc_call_center_id char(16) ,
cc_rec_start_date date ,
cc_rec_end_date date ,
cc_closed_date_sk int ,
cc_open_date_sk int ,
cc_name varchar(50) ,
cc_class varchar(50) ,
cc_employees int ,
cc_sq_ft int ,
cc_hours char(20) ,
cc_manager varchar(40) ,
cc_mkt_id int ,
cc_mkt_class char(50) ,
cc_mkt_desc varchar(100) ,
cc_market_manager varchar(40) ,
cc_division int ,
cc_division_name varchar(50) ,
cc_company int ,
cc_company_name char(50) ,
cc_street_number char(10) ,
cc_street_name varchar(60) ,
cc_street_type char(15) ,
cc_suite_number char(10) ,
cc_city varchar(60) ,
cc_county varchar(30) ,
cc_state char(2) ,
cc_zip char(10) ,
cc_country varchar(20) ,
cc_gmt_offset decimal(5,2) ,
cc_tax_percentage decimal(5,2)
)row format delimited fields terminated by '|';
load data local inpath '/data/lhy/data/call_center/*' overwrite into table call_center;
create table if not exists tpc_in.date_dim
(
d_date_sk int ,
d_date_id char(16) ,
d_date timestamp ,
d_month_seq int ,
d_week_seq int ,
d_quarter_seq int ,
d_year int ,
d_dow int ,
d_moy int ,
d_dom int ,
d_qoy int ,
d_fy_year int ,
d_fy_quarter_seq int ,
d_fy_week_seq int ,
d_day_name char(9) ,
d_quarter_name char(6) ,
d_holiday char(1) ,
d_weekend char(1) ,
d_following_holiday char(1) ,
d_first_dom int ,
d_last_dom int ,
d_same_day_ly int ,
d_same_day_lq int ,
d_current_day char(1) ,
d_current_week char(1) ,
d_current_month char(1) ,
d_current_quarter char(1) ,
d_current_year char(1)
)row format delimited fields terminated by '|' ;
load data local inpath '/data/lhy/data/date_dim/*' overwrite into table date_dim;
create table if not exists tpc_in.item
(
i_item_sk int ,
i_item_id char(16) ,
i_rec_start_date timestamp ,
i_rec_end_date timestamp ,
i_item_desc varchar(200) ,
i_current_price decimal(7,2) ,
i_wholesale_cost decimal(7,2) ,
i_brand_id int ,
i_brand char(50) ,
i_class_id int ,
i_class char(50) ,
i_category_id int ,
i_category char(50) ,
i_manufact_id int ,
i_manufact char(50) ,
i_size char(20) ,
i_formulation char(20) ,
i_color char(20) ,
i_units char(10) ,
i_container char(10) ,
i_manager_id int ,
i_product_name char(50)
)row format delimited fields terminated by '|';
load data local inpath '/data/lhy/data/item/*' overwrite into table item;
create table if not exists tpc_in.inventory
(
inv_date_sk int ,
inv_item_sk int ,
inv_warehouse_sk int ,
inv_quantity_on_hand int
)row format delimited fields terminated by '|';
load data local inpath '/data/lhy/data/inventory/*' overwrite into table inventory;
create table if not exists tpc_in.store_sales
(
ss_sold_date_sk int ,
ss_sold_time_sk int ,
ss_item_sk int ,
ss_customer_sk int ,
ss_cdemo_sk int ,
ss_hdemo_sk int ,
ss_addr_sk int ,
ss_store_sk int ,
ss_promo_sk int ,
ss_ticket_number int ,
ss_quantity int ,
ss_wholesale_cost decimal(7,2) ,
ss_list_price decimal(7,2) ,
ss_sales_price decimal(7,2) ,
ss_ext_discount_amt decimal(7,2) ,
ss_ext_sales_price decimal(7,2) ,
ss_ext_wholesale_cost decimal(7,2) ,
ss_ext_list_price decimal(7,2) ,
ss_ext_tax decimal(7,2) ,
ss_coupon_amt decimal(7,2) ,
ss_net_paid decimal(7,2) ,
ss_net_paid_inc_tax decimal(7,2) ,
ss_net_profit decimal(7,2)
)row format delimited fields terminated by '|';
load data local inpath '/data/lhy/data/store_sales/*' overwrite into table store_sales;
create table if not exists tpc_in.store_returns
(
sr_returned_date_sk int ,
sr_return_time_sk int ,
sr_item_sk int ,
sr_customer_sk int ,
sr_cdemo_sk int ,
sr_hdemo_sk int ,
sr_addr_sk int ,
sr_store_sk int ,
sr_reason_sk int ,
sr_ticket_number int ,
sr_return_quantity int ,
sr_return_amt decimal(7,2) ,
sr_return_tax decimal(7,2) ,
sr_return_amt_inc_tax decimal(7,2) ,
sr_fee decimal(7,2) ,
sr_return_ship_cost decimal(7,2) ,
sr_refunded_cash decimal(7,2) ,
sr_reversed_charge decimal(7,2) ,
sr_store_credit decimal(7,2) ,
sr_net_loss decimal(7,2)
)row format delimited fields terminated by '|' ;
load data local inpath '/data/lhy/data/store_returns/*' overwrite into table store_returns;
create table if not exists tpc_in.store
(
s_store_sk int ,
s_store_id char(16) ,
s_rec_start_date date ,
s_rec_end_date date ,
s_closed_date_sk int ,
s_store_name varchar(50) ,
s_number_employees int ,
s_floor_space int ,
s_hours char(20) ,
s_manager varchar(40) ,
s_market_id int ,
s_geography_class varchar(100) ,
s_market_desc varchar(100) ,
s_market_manager varchar(40) ,
s_division_id int ,
s_division_name varchar(50) ,
s_company_id int ,
s_company_name varchar(50) ,
s_street_number varchar(10) ,
s_street_name varchar(60) ,
s_street_type char(15) ,
s_suite_number char(10) ,
s_city varchar(60) ,
s_county varchar(30) ,
s_state char(2) ,
s_zip char(10) ,
s_country varchar(20) ,
s_gmt_offset decimal(5,2) ,
s_tax_precentage decimal(5,2)
)row format delimited fields terminated by '|' ;
load data local inpath '/data/lhy/data/store/*' overwrite into table store;
首先清空缓存:
echo 1 > /proc/sys/vm/drop_caches
echo 2 > /proc/sys/vm/drop_caches
echo 3 > /proc/sys/vm/drop_caches
测试项 | hive |
单表查询14.39亿条数据(select count(*) from store_sales;),TEZ引擎 | 698.035 seconds |
查询语句
select i_item_id,i_item_desc,i_current_price
from item, inventory, date_dim, store_sales
where i_current_price between 76.00 and 106.00
and inv_item_sk = i_item_sk
and d_date_sk=inv_date_sk
and d_date between cast('1998-06-29' as date) and cast('1998-08-29' as date)
and i_manufact_id in (512,409)
and inv_item_sk = ss_item_sk
group by i_item_id,i_item_desc,i_current_price
order by i_item_id
limit 100;
测试项 | hive用时 |
管理查询 | 1532.872 seconds |