--定义新表
create EXTERNAL table if not exists hdw_psi${g_he_dbpostfix}.psi_nginx_access_log_cnt(
statistics_dt string comment'统计日期'
,data_dt string comment'数据日期'
,d_cnt bigint comment'日调用次数'
,m_cnt bigint comment'月调用次数'
)
partitioned by (pdate string comment'分区')
stored as orc--表结构采用orc
tblproperties('creater'='22033799');
insert into hdw_psi_dev.test1(mac_id, start_time, end_time, wind_speed)
select '001', '07:20', '07:49', '2,3' union all
select '001', '11:25', '12:30', '1,2,3' union all
select '002', '10:35', '11:13', '2' union all
select '002', '13:30', '14:15', '3,4' union all
select '002', '20:45', '21:00', '1,2,3';
--将本地数据加载到hdfs
load data inpath '/user/udhuangj/spark/test_data_less.csv' into table hdw_psi_dev.opn_learn_day partition(pdate='20230314');
create table hdw_psi_dev.opn_learn_day(
statistics_dt string COMMENT '使用日期 yyyy-MM-dd',
mac_id string COMMENT 'macid',
wifi_type string COMMENT 'WiFitypeId',
warmup_starttime string COMMENT '预热开始时间 HH:mm',
warmup_endtime string COMMENT '预热结束时间 HH:mm',
target_temperature string COMMENT '目标温度'
) comment '用水明细表测试'
PARTITIONED BY (pdate string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\,' STORED AS TEXTFILE;
create table hdw_psi_dev.opn_learn_day_test(
statistics_dt string COMMENT '使用日期 yyyy-MM-dd',
mac_id string COMMENT 'macid',
wifi_type string COMMENT 'WiFitypeId',
warmup_starttime string COMMENT '预热开始时间 HH:mm',
warmup_endtime string COMMENT '预热结束时间 HH:mm',
target_temperature string COMMENT '目标温度'
) comment '用水明细表测试'
PARTITIONED BY (wifi string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\,' STORED AS TEXTFILE;
--按照挑选出来的变量传递给分区
SET hive.exec.dynamic.partition=true;
SET hive.exec.dynamic.partition.mode=nonstrict;
insert into hdw_psi_dev.opn_learn_day_test partition(wifi)
select
statistics_dt,
mac_id,
wifi_type,
warmup_starttime,
warmup_endtime,
target_temperature,
wifi_type
from hdw_psi_dev.opn_learn_day
where pdate='20230313';
小批量的话不需要concat_ws,若数据量大需要配合加入concat_ws
--小批量
insert overwrite table hdw_psi_dev.opn_learn_day partition(pdate='20230330')
select
'20230330' as statistics_dt,
t1.mac_id,
t1.wifi_type,
t1.warmup_starttime,
t1.warmup_endtime,
t1.target_temperature
from (
select
mac_id,
collect_list(distinct wifi_type) as wifi_type,
collect_list(warmup_starttime) as warmup_starttime,
collect_list(warmup_endtime) as warmup_endtime,
collect_list(target_temperature) as target_temperature
from hdw_psi_dev.opn_learn_day
where pdate = '20230313'
group by mac_id
) as t1;
--大批量
insert overwrite table hdw_psi_dev.opn_learn_day partition(pdate='20230330')
select
'20230330' as statistics_dt,
t1.mac_id,
t1.wifi_type,
t1.warmup_starttime,
t1.warmup_endtime,
t1.target_temperature
from (
select
mac_id,
concat_ws('-', collect_list(distinct wifi_type)) as wifi_type,
concat_ws('-', collect_list(warmup_starttime)) as warmup_starttime,
concat_ws('-', collect_list(warmup_endtime)) as warmup_endtime,
concat_ws('-', collect_list(target_temperature)) as target_temperature
from hdw_psi_dev.opn_learn_day
where pdate = '20230313'
group by mac_id
) as t1;