实际业务-用户流失模型(附:python代码)

记录在工作中建立的用户流失模型

一、数据监控

通过日报、周报监控用户留存率及流失率

二、发现问题

用户流失率行业内较高

三、分析目的

通过现有数据搭建用户流失模型,采用手段提高留存降低流失

四、数据分析

1. sql取数

代码见文章末尾


image.png

附:sql逻辑

select i.uid,i.userid,i.daudays,i.recentdate,i.playdate,i.download,i.likes,k.uid,k.songlist,k.artist,k.scene,k.album,k.song
from
(select g.uid,e.userid,g.daudays,g.recentdate,g.playdate,g.download,h.likes
from
(select e.uid,e.userid,e.daudays,e.recentdate,e.playdate,f.download
from
(select c.uid,c.userid,c.daudays,c.recentdate,d.playdate
from
(select a.uid,a.userid,a.daudays,b.recentdate
from
(select uid,userid,count(distinct cdate) as daudays
from logformat.log_format_app_startup 
where cdate>='20200215' and cdate<='20200317'  
and version like '8%'
and producttype='apk'
group by 1,2) as a--近30天设备活跃天数

join

(select t.uid,t.cdate as recentdate
from
(select uid,cdate,
row_number() over (partition by uid order by cdate desc) as rank
from logformat.log_format_app_startup 
where cdate>='20200215' and cdate<='20200317'
and version like '8%'
and producttype='apk')t
where rank=1
) as b--最近一次活跃的日期--
on a.uid = b.uid) as c

left join

(select tt.uid,tt.cdate as playdate
from
(select uid,cdate,
row_number() over (partition by uid order by cdate desc) as rank
from logformat.log_format_app_startup 
where cdate>='20200215' and cdate<='20200317'
and version like '8%'
and producttype='apk'
and caction in ('play','localplay'))tt
where rank=1
) as d--近最近一次发起播放的日期
on c.uid = d.uid) as e

left join

(select event_baiduid,count(1) as download
from music_mds_bhv_download
where event_day>='20200215' and  event_day<='20200317' 
and event_terminal_type='wiseclient' and event_baiduid<>'' and event_baiduid is not null
group by 1) as f--近30天下载次数--
on e.uid = f.event_baiduid) as g

left join 


(select event_baiduid,count(1) as likes
from music_mds_bhv_fav
where event_day>='20200215' and  event_day<='20200317' 
and event_terminal_type='wiseclient' and event_baiduid<>'' and event_baiduid is not null
and fav_type in ('android_fav','iphone_fav')
group by 1) as h --近30天收藏次数--
on g.uid = h.event_baiduid) as i

left join

(select j.uid,j.songlist,j.artist,j.scene,j.album,j.song
from
(select event_urlparams ['cuid'] as uid,
sum(case when event_urlparams['pathtype']='songlist' then 1 else 0 end) as songlist ,
sum(case when event_urlparams['pathtype']='artist' then 1 else 0 end) as artist ,
sum(case when event_urlparams['pathtype']='scene' then 1 else 0 end) as scene ,
sum(case when event_urlparams['pathtype']='album' then 1 else 0 end) as album ,
sum(case when event_urlparams['pathtype']='' then 1 else 0 end) as song 
from udw.udw_event
where event_day>='20200215' and  event_day<='20200317' 
and event_pid='121' and event_action in ('music_mobile_app_play','music_mobile_app_other')
and event_urlparams['action'] in ('play','localplay')
and event_urlparams ['mod'] = 'ios'
group by 1

union all

select event_urlparams ['cuid'] as uid,
sum(case when (size(split(event_urlparams['from'],'_'))>2 and regexp_extract(event_urlparams['from'],'^(.*)\\_(.*?)\\_([0-9,]+)$',3) <> '')='songlist') then 1 else 0) as songlist,
sum(case when (size(split(event_urlparams['from'],'_'))>2 and regexp_extract(event_urlparams['from'],'^(.*)\\_(.*?)\\_([0-9,]+)$',3) <> '')='artist') then 1 else 0) as artist,
sum(case when (size(split(event_urlparams['from'],'_'))>2 and regexp_extract(event_urlparams['from'],'^(.*)\\_(.*?)\\_([0-9,]+)$',3) <> '')='scene') then 1 else 0) as scene,
sum(case when (size(split(event_urlparams['from'],'_'))>2 and regexp_extract(event_urlparams['from'],'^(.*)\\_(.*?)\\_([0-9,]+)$',3) <> '')='album') then 1 else 0) as album,
sum(case when (size(split(event_urlparams['from'],'_'))>2 and regexp_extract(event_urlparams['from'],'^(.*)\\_(.*?)\\_([0-9,]+)$',3) <> '')='') then 1 else 0) as song
from udw.udw_event
where event_day>='20200215' and  event_day<='20200317' 
and event_pid='121' and event_action in ('music_mobile_app_play','music_mobile_app_other')
and event_urlparams['action'] in ('play','localplay')
and event_urlparams ['mod'] = 'android'
group by 1) as j ) as k--是否在仅30天内从歌单、场景、艺人、专辑、单曲维度发起过播放--
on i.uid = k.uid

你可能感兴趣的:(实际业务-用户流失模型(附:python代码))