本文数据来源于《七周成为数据分析师》,可在以下网盘链接获取本次分析数据:
链接:https://pan.baidu.com/s/14CvQiDa6blA2v1L5Xdu5hA
提取码:bmzp
数据格式浏览:
表order_info_utf包括了以下四个指标:
orderId:订单id,userId:用户id,price:购买价格,paidTime:支付时间
表user_info_utf包括了以下三个指标:
userId:用户id,sex:用户性别,birth:用户出生日期
select date_format(paidtime,'%Y-%m') as month,sum(price)
from order_info_utf
where ispaid='已支付'
group by date_format(paidtime,'%Y-%m');
select date_format(paidtime,'%Y-%m')as month,count(*) from order_info_utf
where ispaid='已支付'
group by date_format(paidtime,'%Y-%m');
select date_format(paidtime,'%Y-%m')as month,count(DISTINCT userid)as number from order_info_utf
where ispaid='已支付'
group by date_format(paidtime,'%Y-%m');
用户首次购买(付费)有特别的意义,尤其对于可免费体验的产品。
首次付费模式(什么情景下有人付费)可反推至其他未付费的用户,以辅助运营决策,有利于激活用户付费。
select t1.* from order_info_utf t1
join
(select userid,min(paidtime)as paidtime from order_info_utf
where ispaid='已支付'
group by userid) t2
on t1.userid=t2.userid and t1.paidtime=t2.paidtime
group by userid
order by t1.userid;
可以根据顾客消费高峰期进行相应的推荐
select hour(paidtime),count(*)from order_info_utf
where ispaid='已支付'
group by hour(paidtime)
order by count(*) desc;
SELECT sex,avg(ct) FROM
(SELECT o.userId,sex,count(1) as ct FROM order_info_utf o
inner join
(SELECT * FROM user_info_utf where sex <> '') u
on o.userId = u.userId
GROUP BY o.userId,sex) t
GROUP BY sex;
select sex,
sum(case when age_group='0-20' then avg_p else 0 end) as '0-20',
sum(case when age_group='20-40' then avg_p else 0 end) as '20-40',
sum(case when age_group='40-60' then avg_p else 0 end) as '40-60',
sum(case when age_group='60以上' then avg_p else 0 end) as '60以上'
from
(select sex,
case when age>0 and age <20 then '0-20'
when age>=20 and age <40 then '20-40'
when age>=40 and age <60 then '40-60'
else '60以上' end as age_group,
avg(price)as avg_p from
(select o.userid,u.sex,year(now())-year(u.birth) as age,o.price from
(select userid,sum(price) as price from order_info_utf
where ispaid='已支付'
group by userid) o
join (select * from user_info_utf where sex<>'' and birth<>'')u
on o.userid = u.userid) t
group by sex,age_group)tt
group by sex;
SELECT userId,max(paidTime),min(paidTime),DATEDIFF(max(paidTime),min(paidTime)) FROM order_info_utf
WHERE isPaid = '已支付'
GROUP BY userId
HAVING count(userId)>1;
方法一:
select month,day,price from
(select *,
if(@pre_month=month,@cur_rank:=@cur_rank+1,@cur_rank:= 1) ranking,
@pre_month := month
from (
select date_format(paidtime,'%Y-%m')as month,date_format(paidtime,'%Y-%m-%d')as day,sum(price)as price from order_info_utf
where ispaid='已支付'
group by date_format(paidtime,'%Y-%m-%d')
) t, (select @pre_month:=null,@cur_rank:=0) r
order by month,price desc) t
where ranking<=3;
方法二:
select * from
(select date_format(paidtime,'%Y%m')as month,date_format(paidtime,'%Y%m%d')as day,sum(price)as price from order_info_utf
where ispaid='已支付'
group by date_format(paidtime,'%Y%m%d')
order by day) t1
where
(select count(*) from
(select date_format(paidtime,'%Y%m')as month,date_format(paidtime,'%Y%m%d')as day,sum(price)as price from order_info_utf
where ispaid='已支付'
group by date_format(paidtime,'%Y%m%d')
order by day)t2
where t1.month=t2.month and t1.price<t2.price)<3;
select count(ct),concat(count(if(ct=1,1,null))/count(ct)*100,'%') 消费一次用户占比 from
(select userId,count(userId) ct from order_info_utf
where isPaid = '已支付'
GROUP BY userId) t;
复购率指本月消费了一次以上的占比
SELECT m,
count(ct) m_user,count(if(ct>1,1,null)) pur_user,
concat(count(if(ct>1,1,null))/count(ct)*100,'%') as 复购率
from
(SELECT date_format(paidTime,'%Y-%m') m,userId,count(userId) ct FROM order_info_utf
where isPaid = '已支付'
GROUP BY date_format(paidTime,'%Y-%m'),userId) t
GROUP BY m;
回购率指本月购买的人在下个月依旧购买
SELECT t1.m,count(t1.m) m_user,count(t2.m) re_user,concat(count(t2.m)/count(t1.m)*100,'%') 回购率 FROM
(SELECT userId,date_format(paidTime,'%Y-%m-01') m FROM order_info_utf
where isPaid = '已支付'
GROUP BY userId,m) t1
left join
(SELECT userId,date_format(paidTime,'%Y-%m-01') m FROM order_info_utf
where isPaid = '已支付'
GROUP BY userId,m) t2
on t1.userId= t2.userId and t1.m=date_sub(t2.m,interval 1 month)
GROUP BY t1.m;
知识点:求前一天或前一个月的表达(例如:今天是2020年8月25日)
统计消费的二八法则,消费的top20%用户,贡献了多少额度?
# 先统计出所有用户数量以及消费总额
select count(userId),sum(total) from
(SELECT userId,sum(price) total FROM order_info_utf
where isPaid = '已支付'
GROUP BY userId
order by total) t;
# 再统计top20%用户消费的总额及占比
SELECT count(userId),concat(sum(total)/318503081*100,'%') 占比 FROM
(SELECT userId,sum(price) total FROM order_info_utf
where isPaid = '已支付'
GROUP BY userId
ORDER BY total desc
LIMIT 17130) t;