hive实战开发-2(建表、写数、加载数据到hdfs、动态分区、列转行)

hive实战开发-2

  • 1、建表
  • 2、写数
  • 3、加载数据到hdfs
  • 4、动态分区
  • 5、利用group by + collect_list命令按照mac聚合数据

1、建表

--定义新表
create EXTERNAL table if not exists hdw_psi${g_he_dbpostfix}.psi_nginx_access_log_cnt(
statistics_dt string comment'统计日期'
,data_dt string comment'数据日期'
,d_cnt bigint comment'日调用次数'
,m_cnt bigint comment'月调用次数'
)
partitioned by (pdate string comment'分区')
stored as orc--表结构采用orc
tblproperties('creater'='22033799');

2、写数

insert into hdw_psi_dev.test1(mac_id, start_time, end_time, wind_speed) 
select '001', '07:20', '07:49', '2,3' union all
select '001', '11:25', '12:30', '1,2,3' union all
select '002', '10:35', '11:13', '2' union all
select '002', '13:30', '14:15', '3,4' union all
select '002', '20:45', '21:00', '1,2,3';

3、加载数据到hdfs

--将本地数据加载到hdfs
load data inpath '/user/udhuangj/spark/test_data_less.csv' into table hdw_psi_dev.opn_learn_day partition(pdate='20230314');

4、动态分区

create table hdw_psi_dev.opn_learn_day(
    statistics_dt              string COMMENT '使用日期 yyyy-MM-dd',
    mac_id                   string COMMENT 'macid',
    wifi_type                string COMMENT 'WiFitypeId',
    warmup_starttime         string COMMENT '预热开始时间 HH:mm',
    warmup_endtime           string COMMENT '预热结束时间 HH:mm',
    target_temperature       string COMMENT '目标温度'
) comment '用水明细表测试'
PARTITIONED BY (pdate string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\,' STORED AS TEXTFILE;

create table hdw_psi_dev.opn_learn_day_test(
    statistics_dt              string COMMENT '使用日期 yyyy-MM-dd',
    mac_id                   string COMMENT 'macid',
    wifi_type                string COMMENT 'WiFitypeId',
    warmup_starttime         string COMMENT '预热开始时间 HH:mm',
    warmup_endtime           string COMMENT '预热结束时间 HH:mm',
    target_temperature       string COMMENT '目标温度'
) comment '用水明细表测试'
PARTITIONED BY (wifi string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\,' STORED AS TEXTFILE;

--按照挑选出来的变量传递给分区
SET hive.exec.dynamic.partition=true;
SET hive.exec.dynamic.partition.mode=nonstrict;
insert into hdw_psi_dev.opn_learn_day_test partition(wifi) 
select 
    statistics_dt,
    mac_id,
    wifi_type,
    warmup_starttime,
    warmup_endtime,
    target_temperature,
    wifi_type
from hdw_psi_dev.opn_learn_day
where pdate='20230313';

5、利用group by + collect_list命令按照mac聚合数据

小批量的话不需要concat_ws,若数据量大需要配合加入concat_ws

--小批量
insert overwrite table hdw_psi_dev.opn_learn_day partition(pdate='20230330')
select 
    '20230330' as statistics_dt,
    t1.mac_id,
    t1.wifi_type,
    t1.warmup_starttime,
    t1.warmup_endtime,
    t1.target_temperature
from (
    select 
        mac_id,
        collect_list(distinct wifi_type) as wifi_type,
        collect_list(warmup_starttime) as warmup_starttime,
        collect_list(warmup_endtime) as warmup_endtime,
        collect_list(target_temperature) as target_temperature
    from hdw_psi_dev.opn_learn_day
    where pdate = '20230313'
    group by mac_id
    ) as t1;

--大批量
insert overwrite table hdw_psi_dev.opn_learn_day partition(pdate='20230330')
select 
    '20230330' as statistics_dt,
    t1.mac_id,
    t1.wifi_type,
    t1.warmup_starttime,
    t1.warmup_endtime,
    t1.target_temperature
from (
    select 
        mac_id,
        concat_ws('-', collect_list(distinct wifi_type)) as wifi_type,
        concat_ws('-', collect_list(warmup_starttime)) as warmup_starttime,
        concat_ws('-', collect_list(warmup_endtime)) as warmup_endtime,
        concat_ws('-', collect_list(target_temperature)) as target_temperature
    from hdw_psi_dev.opn_learn_day
    where pdate = '20230313'
    group by mac_id
    ) as t1;

你可能感兴趣的:(hive,hive,hdfs,大数据)