一、创建表,并将数据导入userinfo和orderinfo表中
1.导入数据:
第一步:建表
create table orderinfo(
orderid int primary key not null ,
userid int,
isPaid varchar(10),
price float,
paidTime varchar(30));
create table userinfo(
userid int primary key,
sex varchar(10),
birth date);
第二步:导数
load data local infile 'C:/Users/lan/Desktop/123/order_info_utf.csv' into table orderinfo fields terminated by ',';
load data local infile 'C:/Users/lan/Desktop/123/user_info_utf.csv' into table userinfo fields terminated by ',';
二、mysql数据分析案例
1.首先了解两个表的信息:
userinfo 用户信息表:
userid:主键,用户id
sex:用户性别
birth:用户生日
orderinfo 订单信息表
orderid:订单id
userid:用户id
ispaid:支付状态
price:支付价格
paidtime:支付时间
2.统计不同月份的下单人数
分析要点:不同月份,使用month函数获取订单日期中的月份,下单状态为已支付状态
select month(paidtime),count(distinct userid) from orderinfo where ispaid='已支付' group by month(paidtime);
3.统计用户三月份和四月份回购率和复购率
复购率:本月消费的人数有多少是重复购买的,即购买次数大于1次;
分步计算:
首先求出三月份存在消费的用户,以及每个用户的消费次数;(由于运行数据较大限制显示行数)
select userid,count(userid) as ct from orderinfo where ispaid='已支付' and month(paidtime)=3 group by userid limit 10;
如上表所示userid列为本月存在消费的用户,ct列为消费次数,统计ct列,count(ct)为计算本月存在消费总人数,通过if函数计算出消费次数大于1的消费者数量,即复购的消费者数,相除即为复购率;
select count(ct),count(if(ct>1,1,null)) as nt from
(select userid,count(userid) as ct from orderinfo where ispaid='已支付' and month(paidtime)=3 group by userid) t;
也可使用case when方法,结果相同;
select count(ct),count(case when ct>1 then 1 else null end) as nt from
(select userid,count(userid) as ct from orderinfo where ispaid='已支付' and month(paidtime)=3 group by userid) t;
回购率:三月份购买的人数,四月份依旧购买;
首先用userid和月份进行分组,计算用户在当月是否消费过;(由于运行数据较大限制显示行数)
select userid,date_format(paidtime,'%Y-%m-01') from orderinfo where ispaid='已支付' group by userid,date_format(paidtime,'%Y-%m-01') limit 10;
通过left join对表进行关联,关联出三月、四月都存在消费情况(由于运行数据较大限制显示行数)
如图,userid5 在3月和4月都存在消费,
select * from
(select userid,date_format(paidtime,'%Y-%m-01') as m from orderinfo where ispaid='已支付' group by userid,date_format(paidtime,'%Y-%m-01')) t1
left join
(select userid,date_format(paidtime,'%Y-%m-01') as m from orderinfo where ispaid='已支付' group by userid,date_format(paidtime,'%Y-%m-01'))t2
on t1.userid=t2.userid and t1.m=date_sub(t2.m,interval 1 month) limit 20;
对上表进行计数,计算出当月存在消费的情况和下月依然存在消费的情况,相除即为复购率;
select t1.m,count(t1.m),count(t2.m) from
(select userid,date_format(paidtime,'%Y-%m-01') as m from orderinfo where ispaid='已支付' group by userid,date_format(paidtime,'%Y-%m-01')) t1
left join
(select userid,date_format(paidtime,'%Y-%m-01') as m from orderinfo where ispaid='已支付' group by userid,date_format(paidtime,'%Y-%m-01'))t2
on t1.userid=t2.userid and t1.m=date_sub(t2.m,interval 1 month) group by t1.m;
3.统计男女用户的消费频次是否有差异;
首先性别字段存在空值,先把空值过滤掉;
select * from userinfo where sex <>' ' limit 10;
将订单表与用户表相关联;
select * from orderinfo o
inner join
(select * from userinfo where sex<>' ') t
on o.userid=t.userid limit 20;
统计男、女的消费总次数;
select o.userid,sex,count(1) from orderinfo o
inner join
(select * from userinfo where sex<>' ') t
on o.userid=t.userid
group by o.userid,sex limit 20;
统计男女消费频次差异;
select sex,avg(ct) from (
select o.userid,sex,count(1) as ct from orderinfo o
inner join
(select * from userinfo where sex<>' ') t
on o.userid=t.userid
group by o.userid,sex) t2
group by sex;
4.统计多次消费的用户,第一次消费和最后一次消费的时间间隔
首先求出多次消费的用户;
select * from orderinfo where ispaid='已支付' group by userid having count(1)>1;
求出最大时间,和最小时间,时间间隔(求分组后的组内的最大值和最小值,按照userid 分组已经过滤掉了消费次数小于1的用户,所以至少存在两次消费情况,课选出最大时间,最小时间)
select userid,max(paidtime),min(paidtime) ,datediff(max(paidtime),min(paidtime)) as mt from orderinfo where ispaid='已支付' group by userid having count(1)>1 limit 20;
5.统计不同年龄段,消费是否有差异:
首先统计用户的年龄段;注意年龄计算函数,过滤无效数据;
select userid,ceil((year(now())-year(birth))/10) as ct from userinfo where birth>'1901-00-00' limit 10;
订单表和用户表关联,查看不同年龄段消费情况;
select * from orderinfo o
inner join
(select userid,ceil((year(now())-year(birth))/10) as ct from userinfo where birth>'1901-00-00') t
on o.userid=t.userid limit 10;
统计不同年龄段消费频次;
select o.userid,t.ct,count(t.ct) as mt from orderinfo o
inner join
(select userid,ceil((year(now())-year(birth))/10) as ct from userinfo where birth>'1901-00-00') t
on o.userid=t.userid group by o.userid,t.ct limit 10;
6.统计消费二八法则,消费20%的用户,贡献了多少额度;
首先计算用户的消费总金额;
select userid,sum(price) from orderinfo where ispaid='已支付' group by userid limit 10;
select userid,sum(price) as total from orderinfo where ispaid='已支付' group by userid limit 10;
计算出前20%的用户;
select count(userid) *0.2,sum(total) from (
select userid,sum(price) as total from orderinfo where ispaid='已支付' group by userid order by total) t;
由于这里计算出的结果是前17000行,所以将内层查询条件限制为17000;
select count(userid) ,sum(total) from (
select userid,sum(price) as total from orderinfo where ispaid='已支付' group by userid order by total desc limit 17000) t;