hive 行列转化

用SQL判断字段串中连续出现两个以上的数字,并返回对应的数字
with a as (
select explode(split(regexp_replace(‘sdfsd1232sdf324’,‘[0-9]+’,‘,’),‘,’)) col
) select * from a where LENGTH(col)>1
结果为sdfsd
sdf
select explode(split(regexp_replace(‘sdfsd1232sdf324’,‘[0-9]+’,‘,’),‘,’))
union all
select explode(split(regexp_replace(‘sdfsd1232sdf324’,‘[a-z]+’,‘,’),‘,’))

select regexp_extract(‘honeymoon’, ‘hon(.*?)(moon)’,2);

select explode(array(‘A’,‘B’,‘C’))
select explode(map(‘a’,1,‘b’,2))

select posexplode(array(‘A’,‘B’,‘C’));

with t2 as ( select ‘1<’ as client_rights,‘a’ as cust_num
union all
select ‘CWe:’ as client_rights,‘a’ as cust_num
)
select
collect_set(t2.client_rights),
– concat_ws(‘’, collect_set(t2.client_rights),
instr(concat_ws(‘’, collect_set(t2.client_rights)), ‘<’),
if(instr(concat_ws(‘’, collect_set(t2.client_rights)), ‘<’) > 0,1,0)
from t2
group by cust_num

create table myhive.pageAds(
pageid string ,
adid_list array
)

create table dual(id string);

select 1+1 from dual;
insert into myhive.pageAds select ‘front_page’ as pageid,array(1,2,3) as adid_list from dual
insert into myhive.pageAds select ‘contact_page’ as pageid,array(3,4,5) as adid_list from dual
hive 行列转化_第1张图片

hdfs dfs -cat /user/hive/warehouse/myhive.db/pageads/000* | hdfs dfs -copyFromLocal - /user/hive/warehouse/myhive.db/pageads/pages; 小文件合并

hive 行列转化_第2张图片

select * from myhive.pageAds;
在这里插入图片描述

select pageid,adid from myhive.pageAds lateral view explode(adid_list) a as adid;
hive 行列转化_第3张图片
select pageid,adid_list,adid_list[0],adid_list[1],adid_list[2] from myhive.pageAds;
hive 行列转化_第4张图片

create table myhive.t_afan_test
(
info1 array,
info2 array
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ‘\t’
COLLECTION ITEMS TERMINATED BY ‘,’;

insert into myhive.t_afan_test select array(12,23,23,34) as info1,array(“what”,“are”,“this”) as info2 from dual;
insert into myhive.t_afan_test select array(56,78) as info1,array(“this”,“is”,“book”) as info2 from dual;
insert into myhive.t_afan_test select array(56,78) as info1,array(“this”,“is”,“book”) as info2 from dual;

select * from myhive.t_afan_test

insert overwrite table myhive.t_afan_test
select a.info1 info1,a.info2 info2 from ( select info1,info2,ROW_NUMBER() over(partition by info1 order by info2) as num from myhive.t_afan_test ) a where a.num=1; 去重

select * from myhive.t_afan_test

select a,b from myhive.t_afan_test lateral view explode(info1) ad as a lateral view explode(info2) ad as b;

select posexplode(info2) from myhive.t_afan_test;

select a.,b. from
(select ina,inb from myhive.t_afan_test
lateral view explode(info1) tmpTable1 as ina
lateral view explode(info2) tmpTable2 as inb) a
left join (select id,inb from myhive.t_afan_test lateral view posexplode(info2) ad as id,inb) b
on a.inb=b.inb

select h.inb,concat_ws(‘,’,collect_list(cast(h.ina as string ))) from (
select a.ina as ina,b.inb as inb,b.id as id from
(select ina,inb from myhive.t_afan_test
lateral view explode(info1) tmpTable1 as ina
lateral view explode(info2) tmpTable2 as inb) a
left join (
select inb,id from myhive.t_afan_test lateral view posexplode(info2) ac as id,inb
) b
on a.inb=b.inb
) h group by h.inb

select info1,info2,ina,inb,id1,id2 from myhive.t_afan_test
lateral view posexplode(info1) tmpTable1 as id1,ina
lateral view posexplode(info2) tmpTable2 as id2,inb
where id1=id2

select h.inb,concat_ws(‘,’,collect_list(cast(h.ina as string ))) from (
select a.ina as ina,b.inb as inb from
(select ina,inb from myhive.t_afan_test
lateral view explode(info1) tmpTable1 as ina
lateral view explode(info2) tmpTable2 as inb) a
left join (
select distinct inb from myhive.t_afan_test lateral view explode(info2) c as inb
) b
on a.inb=b.inb
) h group by h.inb

select c.deep,concat_ws(‘,’,collect_list(cast(c.info as string))) from (
select info,b.hobby hobby,b.deep deep from myhive.t_afan_test a lateral view posexplode(info2) b as hobby,deep
lateral view explode(a.info1) c as info) c
group by c.deep

–行列转换

create table myhive.row2col1(
col1 string,
col2 string,
col3 int
)row format delimited fields terminated by ‘,’;

insert into myhive.row2col1 values(‘a’,‘c’,1),
(‘a’,‘d’,2),
(‘a’,‘e’,3),
(‘b’,‘c’,4),
(‘b’,‘d’,5),
(‘b’,‘e’,6);

select * from myhive.row2col1;

select
col1 as col1,
max(case col2 when ‘c’ then col3 else 0 end) as c,
max(case col2 when ‘d’ then col3 else 0 end) as d,
max(case col2 when ‘e’ then col3 else 0 end) as e
from
myhive.row2col1 group by col1;
hive 行列转化_第5张图片

–多行转单列
create table myhive.row2col2(
col1 string,
col2 string,
col3 int
)row format delimited fields terminated by ‘,’;

insert into myhive.row2col2 values
(‘a’,‘b’,1),
(‘a’,‘b’,2),
(‘a’,‘b’,3),
(‘c’,‘d’,4),
(‘c’,‘d’,5),
(‘a’,‘b’,1),
(‘a’,‘b’,2),
(‘a’,‘b’,3),
(‘c’,‘d’,4),
(‘c’,‘d’,5),
(‘c’,‘d’,6);
select * from myhive.row2col2;
hive 行列转化_第6张图片

select col1,col2,concat_ws(‘,’,collect_set(cast(col3 as string))) from myhive.row2col2 group by col1,col2;–去重
hive 行列转化_第7张图片
select col1,col2,collect_set(col3),collect_set(col3)[0],collect_set(col3)[1],collect_set(col3)[2] from myhive.row2col2 group by col1,col2;
hive 行列转化_第8张图片
select col1,col2,collect_set(col3),collect_set(col3)[0],collect_set(col3)[1],collect_set(col3)[2],collect_set(col3)[3] from myhive.row2col2 group by col1,col2;
hive 行列转化_第9张图片

select col1,col2,concat_ws(‘,’,collect_list(cast(col3 as string))) from myhive.row2col2 group by col1,col2;–不去重

–多列转多行
create table myhive.col2row1(
col1 string,
col2 int,
col3 int,
col4 int
)row format delimited fields terminated by ‘,’;

insert into myhive.col2row1 values
(‘a’,1,2,3),
(‘b’,4,5,6);

select * from myhive.col2row1;
hive 行列转化_第10张图片

select col1, ‘c’ as col2, col2 as col3 from myhive.col2row1
UNION ALL
select col1, ‘d’ as col2, col3 as col3 from myhive.col2row1
UNION ALL
select col1, ‘e’ as col2, col4 as col3 from myhive.col2row1;
hive 行列转化_第11张图片

–单列转多行
create table col2row2(
col1 string,
col2 string,
col3 string
)row format delimited fields terminated by ‘,’; --建表用’,‘隔开,插入用’,'会少数据

create table myhive.col2row2(
col1 string,
col2 string,
col3 string
);

insert into myhive.col2row2 values(‘a’,‘b’,“1,2,3”),
(‘c’,‘d’,“4,5,6”);

select * from myhive.col2row2;
在这里插入图片描述

select col1,col2,col4 from myhive.col2row2 lateral view explode(split(col3,‘,’)) ad as col4

hive 行列转化_第12张图片

hive 行列转化_第13张图片

你可能感兴趣的:(hive,sql)