目录
一、前提
二、用户留存
1、一天留存,直到多天留存,留存率
2、创表
3、sql
三、沉默用户
1、沉默用户
2、创表
3、sql
四、本周回流用户数
1、概念
2、创表
3、sql
五、连续三周活跃用户
1、概念
2、创表
3、sql
六、一周内连续三天的活跃用户
1、概念
2、创表
3、sql
dws层,用户日,周,月活跃度,新增用户,数据已经准备,在这个基础上,分析用户留存,沉默用户,本周回流,流失用户,最近连续三周活跃用户,一周内连续三天活跃的用户等指标详细讲解
留存用户=前一天新增 join 今天活跃
用户留存率=留存用户/前一天新增
create external table dws_user_retention_day
(
`mid_id` string COMMENT '设备唯一标识',
`create_date` string comment '设备新增时间',
`retention_day` int comment '截止当前日期留存天数'
) COMMENT '每日用户留存情况'
PARTITIONED BY (`dt` string)
stored as parquet
location '/warehouse/gmall/dws/dws_user_retention_day/'
;
select
nm.mid_id,
nm.create_date,
1 retention_day
from dws_uv_detail_day ud join dws_new_mid_day nm on ud.mid_id =nm.mid_id
where ud.dt='2019-02-11' and nm.create_date=date_add('2019-02-11',-1);
指的是只在安装当天启动过,且启动时间是在一周前
create external table ads_slient_count(
`dt` string COMMENT '统计日期',
`slient_count` bigint COMMENT '沉默设备数'
)
stored as parquet
location '/warehouse/gmall/ads/ads_slient_count';
select
'2019-02-20' dt,
count(*) slient_count
from
(
select
mid_id,
count(*)
from dws_uv_detail_day
where dt<='2019-02-20'
group by mid_id
having count(*)=1 and min(dt)
本周回流=本周活跃-本周新增-上周活跃
create external table ads_back_count(
`dt` string COMMENT '统计日期',
`wk_dt` string COMMENT '统计日期所在周',
`wastage_count` bigint COMMENT '回流设备数'
)
stored as parquet
location '/warehouse/gmall/ads/ads_back_count';
select
'2019-02-20' dt,
concat(date_add(next_day('2019-02-20','MO'),-7),'_',date_add(next_day('2019-02-20','MO'),-1)) wk_dt,
count(*)
from
(
select t1.mid_id
from
(
select mid_id
from dws_uv_detail_wk
where wk_dt=concat(date_add(next_day('2019-02-20','MO'),-7),'_',date_add(next_day('2019-02-20','MO'),-1))
)t1
left join
(
select mid_id
from dws_new_mid_day
where create_date<=date_add(next_day('2019-02-20','MO'),-1) and create_date>=date_add(next_day('2019-02-20','MO'),-7)
)t2
on t1.mid_id=t2.mid_id
left join
(
select mid_id
from dws_uv_detail_wk
where wk_dt=concat(date_add(next_day('2019-02-20','MO'),-7*2),'_',date_add(next_day('2019-02-20','MO'),-7-1))
)t3
on t1.mid_id=t3.mid_id
where t2.mid_id is null and t3.mid_id is null
)t4;
通常是周一对前3周的数据做统计,该数据一周计算一次。
create external table ads_continuity_wk_count(
`dt` string COMMENT '统计日期,一般用结束周周日日期,如果每天计算一次,可用当天日期',
`wk_dt` string COMMENT '持续时间',
`continuity_count` bigint
)
stored as parquet
location '/warehouse/gmall/ads/ads_continuity_wk_count';
select
'2019-02-20',
concat(date_add(next_day('2019-02-20','MO'),-7*3),'_',date_add(next_day('2019-02-20','MO'),-1)),
count(*)
from
(
select mid_id
from dws_uv_detail_wk
where wk_dt>=concat(date_add(next_day('2019-02-20','MO'),-7*3),'_',date_add(next_day('2019-02-20','MO'),-7*2-1))
and wk_dt<=concat(date_add(next_day('2019-02-20','MO'),-7),'_',date_add(next_day('2019-02-20','MO'),-1))
group by mid_id
having count(*)=3
)t1;
最近7天内连续3天活跃用户数
create external table ads_continuity_uv_count(
`dt` string COMMENT '统计日期',
`wk_dt` string COMMENT '最近7天日期',
`continuity_count` bigint
) COMMENT '连续活跃设备数'
stored as parquet
location '/warehouse/gmall/ads/ads_continuity_uv_count';
select
'2019-02-20',
concat(date_add('2019-02-20',-6),'_','2019-02-20') dt,
count(*)
from
(select mid_id (
select mid_id
from
(
select
mid_id,
date_sub(dt,rank) date_diff
from
(
select
mid_id,
dt,
rank() over(partition by mid_id order by dt) rank
from dws_uv_detail_day
where dt>=date_add('2019-02-20',-6) and dt<='2019-02-20'
)t1
)t2
group by mid_id,date_diff
having count(*)>=3) t4
group by mid_id
)t3;