3.6 转换成key,value的方式
select a.user_id,concat_ws(':',b.kw,cast(count(1) as string)) as kw_w
from user_actions as a
left outer join(
select article_id,kw
from articles
lateral view explode(kws) to as kw
) b
on (a.article_id=b.article_id)
group by a.user_id,b.kw;
结果
11 kw1:4
11 kw4:1
11 kw5:1
11 kw8:3
11 kw9:1
22 kw1:1
22 kw3:1
22 kw4:1
22 kw5:1
22 kw6:1
22 kw7:2
22 kw9:1
33 kw1:1
33 kw3:1
33 kw6:1
33 kw7:1
3.7再次根据用户id,kw进行分组,把key,value生成一个集合,用","号进行拼接
select cc.user_id,concat_ws(",",collect_set(cc.kw_w))
from(
select a.user_id,concat_ws(':',b.kw,cast(count(1) as string)) as kw_w
from user_actions as a
left outer join(
select article_id,kw
from articles
lateral view explode(kws) to as kw
) b
on (a.article_id=b.article_id)
group by a.user_id,b.kw
) as cc group by cc.user_id;
结果
11 kw1:4,kw4:1,kw5:1,kw8:3,kw9:1
22 kw1:1,kw3:1,kw4:1,kw5:1,kw6:1,kw7:2,kw9:1
33 kw1:1,kw3:1,kw6:1,kw7:1,kw8:1
35 1,kw3:1,kw6:1
77 kw1:1,kw4:1,kw5:1,kw7:1,kw9:1
99 1,kw3:1,kw6:1
3.8 因kw_w字段是拼接的,是字符串格式,把转换成map类型
select cc.user_id,str_to_map(concat_ws(",",collect_set(cc.kw_w)))
from(
select a.user_id,concat_ws(':',b.kw,cast(count(1) as string)) as kw_w
from user_actions as a
left outer join(
select article_id,kw
from articles
lateral view explode(kws) to as kw
) b
on (a.article_id=b.article_id)
group by a.user_id,b.kw
) as cc group by cc.user_id;
结果
OK
11 {"kw1":"4","kw4":"1","kw5":"1","kw8":"3","kw9":"1"}
22 {"kw1":"1","kw3":"1","kw4":"1","kw5":"1","kw6":"1","kw7":"2","kw9":"1"}
33 {"kw1":"1","kw3":"1","kw6":"1","kw7":"1","kw8":"1"}
35 {"1":null,"kw3":"1","kw6":"1"}
77 {"kw1":"1","kw4":"1","kw5":"1","kw7":"1","kw9":"1"}
99 {"1":null,"kw3":"1","kw6":"1"}
3.9 最终把结果放到一个表里
create table user_kws as
select cc.user_id,str_to_map(concat_ws(",",collect_set(cc.kw_w))) as wm
from(
select a.user_id,concat_ws(':',b.kw,cast(count(1) as string)) as kw_w
from user_actions as a
left outer join(
select article_id,kw
from articles
lateral view explode(kws) to as kw
) b
on (a.article_id=b.article_id)
group by a.user_id,b.kw
) as cc group by cc.user_id;
3.10 查询包括kw1偏好的用户
select user_id,wm[‘kw1’] from user_kws;
11 4
22 1
33 1
3.11 把每行的key提取出来,和values提取出来。返回两个数组
select user_id,map_keys(wm),map_values(wm) from user_kws;
11 ["kw1","kw4","kw5","kw8","kw9"] ["4","1","1","3","1"]
22 ["kw1","kw3","kw4","kw5","kw6","kw7","kw9"] ["1","1","1","1","1","2","1"]
3.12 把map结果拆开,分别是key value 自己命名
select user_id,keyword,weight
from user_kws
lateral view explode(wm) t as keyword,weight;
11 kw1 4
11 kw4 1
11 kw5 1
11 kw8 3
11 kw9 1
22 kw1 1
22 kw3 1
22 kw4 1
22 kw5 1
22 kw6 1
完结。。。。。。