需求
2017-09-15号的数据:
192.168.33.6,hunter,2017-09-15 10:30:20,/a
192.168.33.7,hunter,2017-09-15 10:30:26,/b
192.168.33.6,jack,2017-09-15 10:30:27,/a
192.168.33.8,tom,2017-09-15 10:30:28,/b
192.168.33.9,rose,2017-09-15 10:30:30,/b
192.168.33.10,julia,2017-09-15 10:30:40,/c
2017-09-16号的数据:
192.168.33.16,hunter,2017-09-16 10:30:20,/a
192.168.33.18,jerry,2017-09-16 10:30:30,/b
192.168.33.26,jack,2017-09-16 10:30:40,/a
192.168.33.18,polo,2017-09-16 10:30:50,/b
192.168.33.39,nissan,2017-09-16 10:30:53,/b
192.168.33.39,nissan,2017-09-16 10:30:55,/a
192.168.33.39,nissan,2017-09-16 10:30:58,/c
192.168.33.20,ford,2017-09-16 10:30:54,/c
新增:jerry,polo,nissan,ford
2017-09-17号的数据:
192.168.33.46,hunter,2017-09-17 10:30:21,/a
192.168.43.18,jerry,2017-09-17 10:30:22,/b
192.168.43.26,tom,2017-09-17 10:30:23,/a
192.168.53.18,bmw,2017-09-17 10:30:24,/b
192.168.63.39,benz,2017-09-17 10:30:25,/b
192.168.33.25,haval,2017-09-17 10:30:30,/c
192.168.33.10,julia,2017-09-17 10:30:40,/c
-- 创建日志表
create table t_web_log(ip string,uname string,access_time string,url string)
partitioned by (day string)
row format delimited fields terminated by ',';
-- 创建日活跃用户表
create table t_user_active_day like t_web_log;
-- 创建日新增用户表
create table t_user_new_day like t_user_active_day;
-- 创建历史用户表
create table t_user_history(uname string)
--》》》》》》》》》》》》1、统计日活跃用户《《《《《《《《《《《《《《《
insert into table t_user_active_day partition(day='2017-09-15')
select tmp.ip,tmp.uname,tmp.access_time,tmp.url
from
(select
ip,uname,access_time,url,row_number() over(partition by uname order by access_time) rn
from
t_web_log where day='2017-09-15') tmp
where rn <2;
--》》》》》》》》》》》》2、统计日新增用户《《《《《《《《《《《《《《《
insert into table t_user_new_day partition(day='2017-09-15')
select
tua.ip,tua.uname,tua.access_time,tua.url
from t_user_active_day tua
left join t_user_history tuh on tua.uname = tuh.uname
where tua.day = '2017-09-15' and tuh.uname IS NULL;
--》》》》》》》》》》》》3、记录历史用户《《《《《《《《《《《《《《《《《
insert into table t_user_history
select uname from t_user_new_day where day='2017-09-15';
--》》》》》》》》》》》》第二天重复操作《《《《《《《《《《《《《《《《《--
--1、统计日活跃用户
insert into table t_user_active_day partition(day='2017-09-16')
select tmp.ip,tmp.uname,tmp.access_time,tmp.url
from
(select
ip,uname,access_time,url,row_number() over(partition by uname order by access_time) rn
from
t_web_log where day='2017-09-16') tmp
where rn <2;
--2、统计日新增用户
insert into table t_user_new_day partition(day='2017-09-16')
select
tua.ip,tua.uname,tua.access_time,tua.url
from t_user_active_day tua
left join t_user_history tuh on tua.uname = tuh.uname
where tua.day = '2017-09-16' and tuh.uname IS NULL;
--3、记录历史用户
insert into table t_user_history
select uname from t_user_new_day where day='2017-09-16';