行转列实现:
表信息:
test_rowcol(dt_m,vaild_num, unvaild_num)
场景一:使用concat_ws和collect_set函数
select dt_m, 'vaild_num', concat_ws(',', collect_set(vaild_num)) from test_rowcol group by dt_m
union all
select dt_m, 'unvaild_num', concat_ws(',', collect_set(unvaild_num)) from test_rowcol group by dt_m;
说明:collect_set函数可以返回一个Array类型。
concat_ws函数可以拼接数组,如下:
select concat_ws('.', 'www', array('baidu', 'com')) from dual limit 1;
'www.baidu.com'
场景二:有时候如果需要对指标字段求和,则上述sql改写成如下:
select dt_m,'vaild_num',sum(vaild_num) from test_rowcol group by dt_m
union all
select dt_m,'unvaild_num',sum(unvaild_num) from test_rowcol group by dt_m;
场景三:使用str_to_map和explode函数以及lateral view
select dt_m, adt.data_type, adt.data
from test_rowcol
lateral view
explode(str_to_map(concat('vaild_num=',vaild_num,'&unvaild_num=',unvaild_num),'&','=')) adt as data_type, data;
说明:str_to_map可以将字符串转换为map,如下:
select str_to_map(concat('vaild_num=',vaild_num,'&unvaild_num=',unvaild_num),'&','=') from test_rowcol;
explode是表生成函数(UDTF),入参是一个数组或者是map,将其迭代,返回多行结果,如下:
select explode(str_to_map(concat('vaild_num=',vaild_num,'&unvaild_num=',unvaild_num),'&','=')) from test_rowcol;
lateral view可以将一列拆分成多行数据。
列转行实现:
利用上面的sql,创建一张表:
create table test_colrow as
select dt_m, adt.data_type, adt.data from test_rowcol lateral view
explode(str_to_map(concat('vaild_num=',vaild_num,'&unvaild_num=',unvaild_num),'&','=')) adt as data_type, data;
场景一:
select dt_m,
sum(case when data_type='vaild_num' then data end) as vaild_num,
sum(case when data_type='unvaild_num' then data end) as unvaild_num
from test_colrow
group by dt_m;
场景二:恢复到test_rowcol表样(费了九牛二虎之力,大神们有高招请多指教)
--创建一组临时表
create table temp_vaild
as select explode(collect_set(case when data_type='vaild_num' then concat(dt_m,'&',data) end)) as vaild_col from test_colrow;
create table temp_unvaild as
select explode(collect_set(case when data_type='unvaild_num' then concat(dt_m,'&',data) end)) as unvaild_col from
test_colrow;
--利用split函数和row_number()over()且再次利用临时表
select aa.dt_m, aa.data, bb.data
from
(select split(vaild_col,'&')[0] as dt_m, split(vaild_col,'&')[1] as data, row_number()over() as num from temp_vaild) aa ,
(select split(unvaild_col,'&')[0] as dt_m, split(unvaild_col,'&')[1] as data, row_number()over() as num from temp_unvaild) bb
where aa.num=bb.num and aa.dt_m=bb.dt_m