实战:Hive在内容推荐系统中的应用(二)

3.6 转换成key,value的方式

select a.user_id,concat_ws(':',b.kw,cast(count(1) as string)) as kw_w

      from user_actions as a

     left outer join(

     select article_id,kw

     from articles

      lateral view explode(kws) to as kw

     ) b

     on (a.article_id=b.article_id)

     group by a.user_id,b.kw;

结果

11      kw1:4

11      kw4:1

11      kw5:1

11      kw8:3

11      kw9:1

22      kw1:1

22      kw3:1

22      kw4:1

22      kw5:1

22      kw6:1

22      kw7:2

22      kw9:1

33      kw1:1

33      kw3:1

33      kw6:1

33      kw7:1

3.7再次根据用户id,kw进行分组,把key,value生成一个集合,用","号进行拼接

select cc.user_id,concat_ws(",",collect_set(cc.kw_w))

from(

 select a.user_id,concat_ws(':',b.kw,cast(count(1) as string)) as kw_w

      from user_actions as a

     left outer join(

     select article_id,kw

     from articles

      lateral view explode(kws) to as kw

     ) b

     on (a.article_id=b.article_id)

     group by a.user_id,b.kw

) as cc group by cc.user_id;

结果

11      kw1:4,kw4:1,kw5:1,kw8:3,kw9:1

22      kw1:1,kw3:1,kw4:1,kw5:1,kw6:1,kw7:2,kw9:1

33      kw1:1,kw3:1,kw6:1,kw7:1,kw8:1

35      1,kw3:1,kw6:1

77      kw1:1,kw4:1,kw5:1,kw7:1,kw9:1

99      1,kw3:1,kw6:1

3.8 因kw_w字段是拼接的,是字符串格式,把转换成map类型

select cc.user_id,str_to_map(concat_ws(",",collect_set(cc.kw_w)))

from(

 select a.user_id,concat_ws(':',b.kw,cast(count(1) as string)) as kw_w

      from user_actions as a

     left outer join(

     select article_id,kw

     from articles

      lateral view explode(kws) to as kw

     ) b

     on (a.article_id=b.article_id)

     group by a.user_id,b.kw

) as cc group by cc.user_id;

结果

OK

11      {"kw1":"4","kw4":"1","kw5":"1","kw8":"3","kw9":"1"}

22      {"kw1":"1","kw3":"1","kw4":"1","kw5":"1","kw6":"1","kw7":"2","kw9":"1"}

33      {"kw1":"1","kw3":"1","kw6":"1","kw7":"1","kw8":"1"}

35      {"1":null,"kw3":"1","kw6":"1"}

77      {"kw1":"1","kw4":"1","kw5":"1","kw7":"1","kw9":"1"}

99      {"1":null,"kw3":"1","kw6":"1"}

3.9 最终把结果放到一个表里

create table user_kws as

select cc.user_id,str_to_map(concat_ws(",",collect_set(cc.kw_w))) as wm

from(

 select a.user_id,concat_ws(':',b.kw,cast(count(1) as string)) as kw_w

      from user_actions as a

     left outer join(

     select article_id,kw

     from articles

      lateral view explode(kws) to as kw

     ) b

     on (a.article_id=b.article_id)

     group by a.user_id,b.kw

) as cc group by cc.user_id;

3.10 查询包括kw1偏好的用户

select user_id,wm[‘kw1’] from user_kws;  

11    4

22    1

33    1

3.11 把每行的key提取出来,和values提取出来。返回两个数组

select user_id,map_keys(wm),map_values(wm) from user_kws;

11      ["kw1","kw4","kw5","kw8","kw9"] ["4","1","1","3","1"]

22      ["kw1","kw3","kw4","kw5","kw6","kw7","kw9"]     ["1","1","1","1","1","2","1"]

3.12 把map结果拆开,分别是key value 自己命名

select user_id,keyword,weight

    from user_kws

   lateral view explode(wm) t as keyword,weight;

11      kw1     4

11      kw4     1

11      kw5     1

11      kw8     3

11      kw9     1

22      kw1     1

22      kw3     1

22      kw4     1

22      kw5     1

22      kw6     1

 

完结。。。。。。

 

你可能感兴趣的:(Hadoop)