[实例]ETL之广告行业-用户行为归类sql


hive ETL之电商零售行业-推荐系统sql - jethai的博客 - 博客频道 - CSDN.NET
http://blog.csdn.net/jethai/article/details/52345349
--========== f_orders ==========--
/*
11 2014-05-01 06:01:12.334+01 10703007267488 item8:2|item1:1
22 2014-05-01 07:28:12.342+01 10101043505096 item6:3|item3:2
33 2014-05-01 07:50:12.33+01 10103043509747 item7:7
11 2014-05-01 09:27:12.33+01 10103043501575 item5:5|item1:1|item4:1|item9:1
22 2014-05-01 09:03:12.324+01 10104043514061 item1:3
33 2014-05-02 19:10:12.343+01 11003002067594 item4:2|item1:1
11 2014-05-02 09:07:12.344+01 10101043497459 item9:1
35 2014-05-03 11:07:12.339+01 10203019269975 item5:1|item1:1
789 2014-05-03 12:59:12.743+01 10401003346256 item7:3|item8:2|item9:1
77 2014-05-03 18:04:12.355+01 10203019262235 item5:2|item1:1
99 2014-05-04 00:36:39.713+01 10103044681799 item9:3|item1:1
33 2014-05-04 19:10:12.343+01 12345678901234 item5:1|item1:1
11 2014-05-05 09:07:12.344+01 12345678901235 item6:1|item1:1
35 2014-05-05 11:07:12.339+01 12345678901236 item5:2|item1:1
22 2014-05-05 12:59:12.743+01 12345678901237 item9:3|item1:1
77 2014-05-05 18:04:12.355+01 12345678901238 item8:3|item1:1
99 2014-05-05 20:36:39.713+01 12345678901239 item9:3|item1:1
*/
CREATE EXTERNAL TABLE f_orders (
user_id STRING
, ts STRING
, order_id STRING
, items map
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
COLLECTION ITEMS TERMINATED BY '|'
MAP KEYS TERMINATED BY ':'
LOCATION '/tmp/db_case3/f_orders';

select * from f_orders where array_contains(map_keys(items), 'item8');

select user_id, order_id, item, amount from f_orders LATERAL VIEW explode(items) t AS item, amount;

--========== d_items ==========--
/*
item1 100.2 catalogA|catalogD|catalogX
item2 200.3 catalogA
item3 300.4 catalogA|catalogX
item4 400.5 catalogB
item5 500.6 catalogB|catalogX
item6 600.7 catalogB
item7 700.8 catalogC
item8 800.9 catalogC|catalogD
item9 899.99 catalogC|catalogA
*/


hive ETL之业绩报表sql - jethai的博客 - 博客频道 - CSDN.NET
http://blog.csdn.net/jethai/article/details/52345350


hive ETL之物流行业-订单跟踪SLA sql - jethai的博客 - 博客频道 - CSDN.NET
http://blog.csdn.net/jethai/article/details/52345346


hive ETL之广告行业-用户行为归类sql - jethai的博客 - 博客频道 - CSDN.NET
http://blog.csdn.net/jethai/article/details/52345348

--========== click_log ==========--
/*
11 ad_101 2014-05-01 06:01:12.334+01
22 ad_102 2014-05-01 07:28:12.342+01
33 ad_103 2014-05-01 07:50:12.33+01
11 ad_104 2014-05-01 09:27:12.33+01
22 ad_103 2014-05-01 09:03:12.324+01
33 ad_102 2014-05-02 19:10:12.343+01
11 ad_101 2014-05-02 09:07:12.344+01
35 ad_105 2014-05-03 11:07:12.339+01
22 ad_104 2014-05-03 12:59:12.743+01
77 ad_103 2014-05-03 18:04:12.355+01
99 ad_102 2014-05-04 00:36:39.713+01
33 ad_101 2014-05-04 19:10:12.343+01
11 ad_101 2014-05-05 09:07:12.344+01
35 ad_102 2014-05-05 11:07:12.339+01
22 ad_103 2014-05-05 12:59:12.743+01
77 ad_104 2014-05-05 18:04:12.355+01
99 ad_105 2014-05-05 20:36:39.713+01
*/
CREATE EXTERNAL TABLE click_log (
cookie_id STRING
, ad_id STRING
, ts STRING
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/tmp/db_case2/click_log';

select cookie_id, collect_set(ad_id) as orders
from click_log
--where ts > '2014-05-02'
group by cookie_id;

select cookie_id, group_concat(ad_id, '|') as orders
from click_log
--where ts > '2014-05-02'
group by cookie_id;

你可能感兴趣的:([实例]ETL之广告行业-用户行为归类sql)