一、概述
二、静态分区
1、创建表并指定分区(PARTITIONED BY)
hive> CREATE TABLE psn (
> id int,
> name string,
> likes array,
> address map
> )
> PARTITIONED BY (age int, sex string)
> ROW FORMAT DELIMITED
> FIELDS TERMINATED BY ','
> COLLECTION ITEMS TERMINATED BY '-'
> MAP KEYS TERMINATED BY ':'
> LINES TERMINATED BY '\n';
2、插入数据(age=10, sex='boy')跟创建表定义的分区类型一致
hive> LOAD DATA LOCAL INPATH '/data/test.log' INTO TABLE psn partition(age=10, sex='boy');
hive> LOAD DATA LOCAL INPATH '/data/test.log' INTO TABLE psn partition(age=20, sex='boy');
3、添加分区(新增加的分区里是没有内容的)
hive> ALTER TABLE psn ADD PARTITION(age=10, sex='man');
hive> ALTER TABLE psn ADD PARTITION(age=20, sex='man');
4、删除分区
# 删除目录为:sex=man的目录
ALTER TABLE psn DROP PARTITION(sex='man');
# 删除目录(age=10)下的目录(sex=man)
ALTER TABLE psn DROP PARTITION(age=10,sex='man');
三、动态分区
1、设置参数:
hive.exec.dynamici.partition=true; #开启动态分区,默认是false
set hive.exec.dynamic.partition.mode=nonstrict; #开启允许所有分区都是动态的,否则必须要有静态分区才能使用。
2、创建临时表
CREATE TABLE temp_hz_task_log2 (
oid string,
inputdate string,
pno string,
im15 string,
url_tag string,
uid string,
at string,
tc string,
ac string
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n';
3、创建动态分区表(先按oid分区,再按inputdate分区)
CREATE TABLE hz_task_log2 (
pno string,
im15 string,
url_tag string,
uid string,
at string,
tc string,
ac string
)
PARTITIONED BY (oid string, inputdate string)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\001'
LINES TERMINATED BY '\n';
4、将临时表(temp_hz_task_log2)插入到动态分区表(hz_task_log2),插入数据之后临时表可删除
# 查询字段需和hz_task_log2建表字段顺序一致,分区字段放在最后
insert overwrite table hz_task_log2 partition(oid,inputdate)
select pno, im15, url_tag, uid, at, tc, ac, oid, inputdate from temp_hz_task_log2