问题:使用beeline不能truncate table表
和/etc/hive/conf.cloudera.hive/hive-site.xml修改参数在beeline里不起作用
在truncate table 表时报下面的错误:
0: jdbc:hive2://172.16.29.10:10000/dw_test> truncate table customer_dim;
INFO : Compiling command(queryId=hive_20180522103030_f8107b37-4f71-4868-9f9b-61aa01657e0e): truncate table customer_dim
INFO : Semantic Analysis Completed
INFO : Returning Hive schema: Schema(fieldSchemas:null, properties:null)
INFO : Completed compiling command(queryId=hive_20180522103030_f8107b37-4f71-4868-9f9b-61aa01657e0e); Time taken: 0.019 seconds
INFO : Executing command(queryId=hive_20180522103030_f8107b37-4f71-4868-9f9b-61aa01657e0e): truncate table customer_dim
INFO : Starting task [Stage-0:DDL] in serial mode
ERROR : FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. Exception while processing
INFO : Completed executing command(queryId=hive_20180522103030_f8107b37-4f71-4868-9f9b-61aa01657e0e); Time taken: 0.06 seconds
Error: Error while processing statement: FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. Exception while processing (state=08S01,code=1)
原因:
用beeline命令执行在hive命令行创建的库和表时,beeline的修改事务的参数不起作用
解决:
用beelin命令执行事务的操作 beeline创建的表。
如:
首先,建库表用beeline
beeline -n hdfs -p hive -u jdbc:hive2://172.16.29.10:10000 -f dw.sql
然后,数据初始化时也应该用相同的命令
beeline -n hdfs -p hive -u jdbc:hive2://172.16.29.10:10000/dw_test -f init_etl.sql
dw.sql语句内容:
drop database if exists dw_test cascade;
create database dw_test;
use dw_test;
-- 建立日期维度表
create table date_dim (
date_sk int comment 'surrogate key',
date date comment 'date,yyyy-mm-dd',
month tinyint comment 'month',
month_name varchar(9) comment 'month name',
quarter tinyint comment 'quarter',
year smallint comment 'year'
)
comment 'date dimension table'
row format delimited fields terminated by ','
stored as textfile;
-- 建立客户维度表
create table customer_dim (
customer_sk int comment 'surrogate key',
customer_number int comment 'number',
customer_name varchar(50) comment 'name',
customer_street_address varchar(50) comment 'address',
customer_zip_code int comment 'zipcode',
customer_city varchar(30) comment 'city',
customer_state varchar(2) comment 'state',
version int comment 'version',
effective_date date comment 'effective date',
expiry_date date comment 'expiry date'
)
clustered by (customer_sk) into 8 buckets
stored as orc tblproperties ('transactional'='true');
-- 建立产品维度表
create table product_dim (
product_sk int comment 'surrogate key',
product_code int comment 'code',
product_name varchar(30) comment 'name',
product_category varchar(30) comment 'category',
version int comment 'version',
effective_date date comment 'effective date',
expiry_date date comment 'expiry date'
)
clustered by (product_sk) into 8 buckets
stored as orc tblproperties ('transactional'='true');
-- 建立订单维度表
create table order_dim (
order_sk int comment 'surrogate key',
order_number int comment 'number',
version int comment 'version',
effective_date date comment 'effective date',
expiry_date date comment 'expiry date'
)
clustered by (order_sk) into 8 buckets
stored as orc tblproperties ('transactional'='true');
-- 建立销售订单事实表
create table sales_order_fact (
order_sk int comment 'order surrogate key',
customer_sk int comment 'customer surrogate key',
product_sk int comment 'product surrogate key',
order_date_sk int comment 'date surrogate key',
order_amount decimal(10 , 2 ) comment 'order amount'
)
clustered by (order_sk) into 8 buckets
stored as orc tblproperties ('transactional'='true');
set hive.support.concurrency = true;
set hive.enforce.bucketing = true;
set hive.exec.dynamic.partition.mode = nonstrict;
set hive.txn.manager = org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.compactor.initiator.on = true;
set hive.compactor.worker.threads = 1;
init_etl.sql语句内容:
set hive.support.concurrency=true;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
set hive.compactor.initiator.on=true;
set hive.compactor.worker.threads=1;
use dw_test;
-- 清空表
truncate table customer_dim;
truncate table product_dim;
truncate table order_dim;
truncate table sales_order_fact;
-- 装载客户维度表
insert into customer_dim
select
row_number() over (order by t1.customer_number) + t2.sk_max,
t1.customer_number, t1.customer_name, t1.customer_street_address,
t1.customer_zip_code, t1.customer_city, t1.customer_state, 1,
'2016-03-01', '2200-01-01'
from rds.customer t1
cross join (select coalesce(max(customer_sk),0) sk_max
from customer_dim) t2;
-- 装载产品维度表
insert into product_dim
select
row_number() over (order by t1.product_code) + t2.sk_max,
product_code, product_name, product_category, 1,
'2016-03-01', '2200-01-01'
from rds.product t1
cross join (select coalesce(max(product_sk),0) sk_max
from product_dim) t2;
-- 装载订单维度表
insert into order_dim
select
row_number() over (order by t1.order_number) + t2.sk_max,
order_number, 1, order_date, '2200-01-01'
from rds.sales_order t1
cross join (select coalesce(max(order_sk),0) sk_max from order_dim) t2;
-- 装载销售订单事实表
insert into sales_order_fact
select order_sk, customer_sk, product_sk, date_sk, order_amount
from rds.sales_order a, order_dim b, customer_dim c,
product_dim d, date_dim e
where a.order_number = b.order_number
and a.customer_number = c.customer_number
and a.product_code = d.product_code
and to_date(a.order_date) = e.date;