Hive常用的sql整理,方便快速查找使用
1.创建Hive表
-- 创建ORC格式分区表
CREATE TABLE if not exists edw_applications.dws_test_table (
cid string,
event_code int,
event_date string,
house_id bigint,
house_project_id int,
event_interval int,
event_weight_score double,
interval_decay_factor double,
event_score double,
event_times bigint,
load_job_number string,
load_job_name string,
insert_timestamp timestamp,
) partitioned by (dt string)
row format delimited
fields terminated by '\001'
stored as ORC;
-- 创建复制表结构
create table edw_applications.dws_test_table_002 like edw_applications.dws_test_table;
-- 删除表
drop table if exists edw_applications.dws_test_table;
2.数据表导入导出
-- 将表数据导入到本地文件
insert overwrite local directory '/data/hadoop/test/dws_test_table' row format delimited fields terminated by '\001'
select * from edw_applications.dws_test_table;
-- 将hdfs上的文件导入hive表
load data inpath '/src/dws_test_table/*' into table dws_test_table; -- hdfs路径,移动文件
-- 将本地文件导入hive表
load data local inpath '/home/xubc/dws_test_table/*' into table dws_test_table; -- 本地路径
3.分区操作
-- 添加分区
alter table edw_applications.dws_test_table add if not exists partition(dt = '${dt}');
-- 删除分区
alter table edw_applications.dws_test_table drop if exists partition(dt = '${dt}');
-- 清空分区数据
truncate table edw_applications.dws_test_table partition(dt = '${dt}');
-- 插入数据
insert overwrite table edw_applications.dws_test_table partition(dt = '${dt}')
select * from edw_applications.dws_test_table_001; -- 覆盖分区
insert into edw_applications.dws_test_table partition(dt = '${dt}')
select * from edw_applications.dws_test_table_001; -- 追加插入
4.添加udf函数
add jar /home/xubc/hive-contrib-1.2.0.jar; -- 本地jar
add jar hdfs://localhost:8010/user/data_user/hive-contrib-1.2.0.jar; -- hdfs上文件jar
create temporary function row_sequence as 'org.apache.hadoop.hive.contrib.udf.UDFRowSequence';
create table edw_applications.tmp_dws_test_table_20161218_local as
select row_sequence() as id, t.* from edw_applications.dws_test_table t where dt= '20161218';
5. insert插入多条数据
-- 采用union all方式插入中文数据
insert into ic_edw_applications.ic_dim_edw_tag_init (tag_type,tag_name,data_source)
select 'room_tag', '1房', 'manual import' union all
select 'room_tag', '2房', 'manual import' union all
select 'room_tag', '3房', 'manual import' union all
select 'room_tag', '4房', 'manual import' union all
select 'room_tag', '5房', 'manual import' union all
select 'room_tag', '6房', 'manual import' ;
-- insert values方式插入非中文不易出现乱码
insert into ic_edw_applications.ic_dim_edw_tag_init (tag_type,tag_name,data_source)
values
('room_tag', '1房', 'manual import'),
('room_tag', '2房', 'manual import'),
('room_tag', '3房', 'manual import'),
('room_tag', '4房', 'manual import'),
('room_tag', '5房', 'manual import'),
('room_tag', '6房', 'manual import') ;
insert overwrite table up.dim_event_code
SELECT a.*
FROM
(SELECT STACK( 4,
1, '浏览', 10001, '详情_PV', '文章浏览', '', 0.1, 4, 1, current_timestamp,
1, '浏览', 10002, '详情_下方点赞', '文章点赞', '', 0.8, 4, 1, current_timestamp,
1, '浏览', 10003, '详情_分享成功', '文章分享', '', 1.0, 4, 1, current_timestamp,
1, '浏览', 10004, 'H5分享按钮', '文章分享', '', 1.0, 4, 1, current_timestamp
)
) a;