1.删除
insert overwrite tmp
select * from tmp where id != '666';
2.更新
insert overwrite tmp
select
id,
label,
if(id = '1' and label = 'grade','25',value) as value
from tmp
where id != '666';
3.列转行
-- Step03:最后将info的内容切分
select id,split(info,':')[0] as label,split(info,':')[1] as value
from
(
-- Step01:先将数据拼接成“heit:180,weit:60,age:26”
select id,concat('heit',':',height,',','weit',':',weight,',','age',':',age) as value from tmp
) as tmp
-- Step02:然后在借用explode函数将数据膨胀至多行
lateral view explode(split(value,',')) mytable as info;
4.行转列1
select
tmp1.idas id,tmp1.valueas height,tmp2.valueas weight,tmp3.valueas age
from
(select id,label,value from tmp2 where label = 'heit') as tmp1
join
on tmp1.id = tmp2.id
(select id,label,value from tmp2 where label = 'weit') as tmp2
join
on tmp1.id = tmp2.id
(select id,label,value from tmp2 where label = 'age') as tmp3
on tmp1.id = tmp3.id;
5.行转列2
select
id,tmpmap['height'] as height,tmpmap['weight'] as weight,tmpmap['age'] as age
from
(
select id,
str_to_map(concat_ws(',',collect_set(concat(label,':',value))),',',':') as tmpmap
from tmp2 groupby id
) as tmp1;
6.分析函数1
select id,label,value,
lead(value,1,0)over(partition by id orderby label) as lead,
lag(value,1,999)over(partition by id orderby label) as lag,
first_value(value)over(partition by id orderby label) as first_value,
last_value(value)over(partition by id orderby label) as last_value
from tmp;
7.分析函数2
select id,label,value,
row_number()over(partition by id orderby value) as row_number, --自然顺序
rank()over(partition by id orderby value) as rank,--跳跃排序
dense_rank()over(partition by id orderby value) as dense_rank --等位排序
from tmp;
8.多维分析1
select col1,col2,col3,count(1),
Grouping__ID
from tmp
groupby col1,col2,col3
grouping sets(col1,col2,col3,(col1,col2),(col1,col3),(col2,col3),())
9.多维分析2
select col1,col2,col3,count(1),
Grouping__ID
from tmp
groupby col1,col2,col3
with cube;
10.数据倾斜groupby
select label,sum(cnt) as all from
(
select rd,label,sum(1) as cnt from
(
select id,round(rand(),2) as rd,value from tmp1
) as tmp
groupby rd,label
) as tmp
groupby label;
11.数据倾斜join
select label,sum(value) as all from
(
select rd,label,sum(value) as cnt from
(
select tmp1.rdas rd,tmp1.labelas label,tmp1.value*tmp2.valueas value
from
(
select id,round(rand(),1) as rd,label,value from tmp1
) as tmp1
join
(
select id,rd,label,value from tmp2
lateral view
explode(split('0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9',',')) mytable as rd
) as tmp2
on tmp1.rd = tmp2.rdand tmp1.label = tmp2.label
) as tmp1
groupby rd,label
) as tmp1
groupby label;