hive常用sql整理

Hive常用的sql整理,方便快速查找使用

1.创建Hive表

-- 创建ORC格式分区表
CREATE TABLE if not exists edw_applications.dws_test_table (
  cid                    string, 
  event_code             int,
  event_date             string,
  house_id               bigint, 
  house_project_id       int, 
  event_interval         int, 
  event_weight_score     double,
  interval_decay_factor  double,
  event_score            double,
  event_times            bigint,
  load_job_number        string, 
  load_job_name          string, 
  insert_timestamp       timestamp, 
) partitioned by (dt string)
  row format delimited
  fields terminated by '\001' 
  stored as ORC;

-- 创建复制表结构
create table edw_applications.dws_test_table_002 like edw_applications.dws_test_table;

-- 删除表
drop table if exists edw_applications.dws_test_table;

2.数据表导入导出

-- 将表数据导入到本地文件  
insert overwrite local directory '/data/hadoop/test/dws_test_table' row format delimited fields terminated by '\001'   
select * from edw_applications.dws_test_table;

-- 将hdfs上的文件导入hive表  
load data inpath '/src/dws_test_table/*' into table dws_test_table;            -- hdfs路径,移动文件  

-- 将本地文件导入hive表 
load data local inpath '/home/xubc/dws_test_table/*' into table dws_test_table;  -- 本地路径

3.分区操作

-- 添加分区
alter table edw_applications.dws_test_table add if not exists partition(dt = '${dt}');

-- 删除分区
alter table edw_applications.dws_test_table drop if exists partition(dt = '${dt}');

-- 清空分区数据
truncate table edw_applications.dws_test_table partition(dt = '${dt}');

-- 插入数据
insert overwrite table edw_applications.dws_test_table partition(dt = '${dt}') 
  select * from edw_applications.dws_test_table_001;         -- 覆盖分区

insert into edw_applications.dws_test_table partition(dt = '${dt}') 
  select * from edw_applications.dws_test_table_001;         -- 追加插入

4.添加udf函数

add jar /home/xubc/hive-contrib-1.2.0.jar;       -- 本地jar
add jar hdfs://localhost:8010/user/data_user/hive-contrib-1.2.0.jar;    -- hdfs上文件jar

create temporary function row_sequence as 'org.apache.hadoop.hive.contrib.udf.UDFRowSequence';

create table edw_applications.tmp_dws_test_table_20161218_local as
select row_sequence() as id, t.* from edw_applications.dws_test_table t where dt= '20161218';

5. insert插入多条数据

-- 采用union all方式插入中文数据
 insert into ic_edw_applications.ic_dim_edw_tag_init (tag_type,tag_name,data_source)
   select 'room_tag', '1房',       'manual import'   union all
   select 'room_tag', '2房',       'manual import'   union all
   select 'room_tag', '3房',       'manual import'   union all
   select 'room_tag', '4房',       'manual import'   union all
   select 'room_tag', '5房',       'manual import'   union all
   select 'room_tag', '6房',       'manual import' ;

-- insert values方式插入非中文不易出现乱码
  insert into ic_edw_applications.ic_dim_edw_tag_init (tag_type,tag_name,data_source)
   values
   ('room_tag', '1房',       'manual import'), 
   ('room_tag', '2房',       'manual import'),
   ('room_tag', '3房',       'manual import'), 
   ('room_tag', '4房',       'manual import'),
   ('room_tag', '5房',       'manual import'),
   ('room_tag', '6房',       'manual import') ;

insert overwrite table up.dim_event_code
SELECT a.*
FROM
  (SELECT STACK( 4, 
                 1, '浏览', 10001, '详情_PV',       '文章浏览', '', 0.1, 4, 1, current_timestamp,
                 1, '浏览', 10002, '详情_下方点赞', '文章点赞', '', 0.8, 4, 1, current_timestamp,
                 1, '浏览', 10003, '详情_分享成功', '文章分享', '', 1.0, 4, 1, current_timestamp,
                 1, '浏览', 10004, 'H5分享按钮',       '文章分享', '', 1.0, 4, 1, current_timestamp 
                 )
) a;


你可能感兴趣的:(hive)