本项目以淘宝电商用户真实行为数据为数据源,运用Navicat 12 for MySQL对其进行数据清洗,利用AARRR模型和RFM模型对其展开数据分析,利用PowerBI制作可视化图像。
FROM UserBehavior;
ALTER TABLE UserBehavior ADD dates varchar(255);
UPDATE UserBehavior SET dates=FROM_UNIXTIME(timestamps,'%Y-%m-%d');
ALTER TABLE UserBehavior ADD hours varchar(255);
UPDATE UserBehavior SET hours=FROM_UNIXTIME(timestamps,'%H:%m:%s');
SELECT min(dates),max(dates) from userbehavior;
SELECT * from userbehavior where dates < '2017-11-25';
DELETE from userbehavior where dates<'2017-11-25';
SELECT min(dates),max(dates) from userbehavior;
SELECT COUNT(DISTINCT userid) AS 'customer',
COUNT(DISTINCT itemid) AS 'item',
COUNT(DISTINCT categoryid) AS 'category',
COUNT(DISTINCT behavior) AS 'behaviortype'
FROM UserBehavior;
select count(distinct userid) as 'UV',
sum(case when behavior='pv' then 1 else 0 end) as 'PV',
sum(case when behavior='pv' then 1 else 0 end)/count(distinct userid) as '人均浏览次数'
from userbehavior;
# 先新增onlyhours列
alter table userbehavior add onlyhours VARCHAR(255);
update userbehavior set onlyhours=FROM_UNIXTIME(timestamps,'%H');
# 每日、小时用户行为
select dates,onlyhours,
sum(case when behavior='pv' then 1 else 0 end) as pv,
sum(case when behavior='fav' then 1 else 0 end) as fav,
sum(case when behavior='cart' then 1 else 0 end) as cart,
sum(case when behavior='buy' then 1 else 0 end) as buy,
count(behavior) as all_click,
count(distinct userid) as all_user
from userbehavior GROUP BY dates,onlyhours ORDER BY dates,onlyhours;
# 每星期用户行为
select date_format(dates,'%W') as weeks,
sum(case when behavior='pv' then 1 else 0 end) as pv,
sum(case when behavior='fav' then 1 else 0 end) as fav,
sum(case when behavior='cart' then 1 else 0 end) as cart,
sum(case when behavior='buy' then 1 else 0 end) as buy,
count(behavior) as all_click,
count(distinct userid) as all_user
from userbehavior GROUP BY weeks ORDER BY field(weeks,'Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday');
浏览页跳失率=仅有点击行为的用户数 / 总UV,为7.02%。具体是指用户仅仅有pv行为,没有其它的收藏、加购、购买行为。较低的浏览页跳失率表明用户对目标页面和推荐商品有一定兴趣。
关键页跳失率=有收藏或加购行为但无购买的用户数 / 总UV,为51.88%。结合前面按日期分布的用户行为特征分析,由于临近双十二,较多用户会选择收藏或加购商品,在等待优惠更大的时机再购买。另一方面,可能是由于商品的库存不足、码数颜色缺货等问题。
# 浏览页跳失率
creat view bounce_rate as
select (select count(distinct userid) from userbehavior) as 总用户,
count(distinct userid) as 仅pv用户,
concat(format(count(distinct userid)/(select count(distinct userid) from userbehavior) * 100,2),'%') as 浏览页跳失率
from userbehavior
where userid not in
(select distinct userid from userbehavior where behavior='fav')
and userid not in
(select distinct userid from userbehavior where behavior='cart')
and userid not in
(select distinct userid from userbehavior where behavior='buy') ;
# 关键页跳失率
creat view key_rate as
select (select count(distinct userid) from userbehavior) as 总用户,
count(distinct userid) as 收藏加购用户,
concat(format(count(distinct userid)/(select count(distinct userid) from userbehavior)*100,2),'%') as 关键页跳失率
from userbehavior
where userid in (select distinct userid from userbehavior where behavior='fav')
or userid in (select distinct userid from userbehavior where behavior='cart')
and userid not in (select distinct userid from userbehavior where behavior='buy');
create view time_inter as
select a.*,b.firstday,datediff(a.dates,b.firstday) as day_diff
from (select userid,dates from userbehavior group by userid,dates) as a,
(select userid,min(dates) as firstday from userbehavior GROUP BY userid) as b
where a.userid=b.userid ORDER BY userid,dates;
create view retention_day as
select firstday,
sum(case when day_diff=0 then 1 else 0 end) as day_0,
sum(case when day_diff=1 then 1 else 0 end) as day_1,
sum(case when day_diff=2 then 1 else 0 end) as day_2,
sum(case when day_diff=3 then 1 else 0 end) as day_3,
sum(case when day_diff=4 then 1 else 0 end) as day_4,
sum(case when day_diff=5 then 1 else 0 end) as day_5,
sum(case when day_diff=6 then 1 else 0 end) as day_6,
sum(case when day_diff=7 then 1 else 0 end) as day_7,
sum(case when day_diff=8 then 1 else 0 end) as day_8
from time_inter
group by firstday
order by firstday;
# 搭建留存率模型retention_rate
create view retention_rate as
select firstday, day_0,
concat(format(day_1/day_0*100, 2), '%') as day_1,
concat(format(day_2/day_0*100, 2), '%') as day_2,
concat(format(day_3/day_0*100, 2), '%') as day_3,
concat(format(day_4/day_0*100, 2), '%') as day_4,
concat(format(day_5/day_0*100, 2), '%') as day_5,
concat(format(day_6/day_0*100, 2), '%') as day_6,
concat(format(day_7/day_0*100, 2), '%') as day_7,
concat(format(day_8/day_0*100, 2), '%') as day_8
from retention_day;
create view c as
Select userid,itemid,
sum(case when behavior='pv' then 1 else 0 end) as '点击',
sum(case when behavior='fav' then 1 else 0 end) as '收藏',
sum(case when behavior='cart' then 1 else 0 end) as '加入购物车',
sum(case when behavior='buy' then 1 else 0 end) as '购买'
from userbehavior GROUP BY userid,itemid;
select count(userid) as '点击' from c where 点击>0;
select count(userid) as '点击、购买' from c where 点击>0 and 加入购物车=0 and 收藏=0 and 购买>0;
select count(userid) as '点击、加入购物车' from c where 点击>0 and 收藏=0 and 加入购物车>0;
SELECT count(userid) as '点击、加入购物车、购买' from c where 点击>0 and 加入购物车>0 and 购买>0;
select count(userid) as '点击、收藏' from c where 点击>0 and 收藏>0 and 加入购物车=0;
SELECT count(userid) as '点击、收藏、购买' from c where 点击>0 and 加入购物车=0 and 购买>0 and 收藏>0;
SELECT count(userid) as '点击、收藏、加入购物车' from c where 点击>0 and 加入购物车>0 and 收藏>0;
SELECT count(userid) as '点击、收藏、加入购物车、购买' from c where 点击>0 and 加入购物车>0 and 购买>0 and 收藏>0;
# 漏斗模型:用户行为漏斗和独立访客漏斗
select behavior,
count(behavior) as behavior_times, count(distinct userid) as user_times
from userbehavior
GROUP BY behavior order by field(behavior,'pv','fav','cart','buy');
create view user_behavior_times as
select userid,
sum(case when behavior='pv' then 1 else 0 end) as pv_times,
sum(case when behavior='fav' then 1 else 0 end) as fav_times,
sum(case when behavior='cart' then 1 else 0 end) as cart_times,
sum(case when behavior='buy' then 1 else 0 end) as buy_times,
concat(format(sum(case when behavior='buy' then 1 else 0 end)/sum(case when behavior='pv' then 1 else 0 end)*100,2),'%') as 购买率,
sum(case when behavior='buy' then 1 else 0 end)/sum(case when behavior='pv' then 1 else 0 end) as sort
from userbehavior GROUP BY userid ORDER BY sort desc;
本文的复购率 = 购买次数>1的用户数 / 购买次数>0的用户数
create view repurchase_rate as
select concat(format((select count(userid) from user_behavior_times where buy_times>1)/(select count(userid) from user_behavior_times where buy_times>0)*100,2),'%') as 复购率;
select * from repurchase_rate;
create view R as
select userid,max(dates) as 'recency' from userbehavior where behavior='buy' GROUP BY userid;
select * from R;
create view R1 as
select userid,recency,
(case when datediff('2017-12-03',recency) between 0 and 2 then 4
when datediff('2017-12-03',recency) between 2 and 4 then 3
when datediff('2017-12-03',recency) between 4 and 6 then 2
when datediff('2017-12-03',recency) >6 then 1 end) as R1 from R;
select * from R1;
select avg(R1) as R_avg from R1;
create view F as
select distinct userid,count(behavior) as 购买次数 from userbehavior where behavior='buy' group by userid;
select * from F;
create view F1 as
select userid,购买次数,
(case when 购买次数<=2 then 1
when 2<购买次数<=4 then 2
when 4<购买次数<=8 then 3
when 8<购买次数 then 4 end) as F1 from F;
select * from F1;
select avg(F1) as F_avg from F1;
create view RFM as
select a.*,b.F1,
(case when a.R1>=3.2846 and b.F1>=1.4352 then '重要价值用户'
when a.R1>=3.2846 and b.F1<1.4352 then '重要发展用户'
when a.R1<3.2846 and b.F1>=1.4352 then '重要保持用户'
when a.R1<3.2846 and b.F1<1.4352 then '重要挽留用户' end) as 用户分类
from R1 as a,F1 as b where a.userid=b.userid;
select * from RFM;
select 用户分类,count(用户分类) as 用户个数 from RFM GROUP BY 用户分类;
create view hot_item as
select categoryid,itemid,
sum(case when behavior='pv' then 1 else 0 end) as pv_times,
sum(case when behavior='buy' then 1 else 0 end) as buy_times
from userbehavior GROUP BY categoryid,itemid ORDER BY categoryid,itemid,buy_times;
select * from hot_item;