在 MySQL 中导入 order_info_utf.csv 和 user_info_utf.csv 两个文件
1. 统计不同月份的下单人数
SELECT MONTH(paidTime),COUNT(DISTINCT userId) # 用DISTINCT 对 userId 去重
FROM data.orderinfo
WHERE isPaid = '已支付' # 过滤脏数据
GROUP BY MONTH(paidTime);
2. 三月份的回购率(次月也有购买 )和复购(当月购买次数在一次以上)率
a. 统计用户三月份的复购率
# 三月已支付的用户
SELECT * FROM data.orderinfo
WHERE isPaid = '已支付'
AND MONTH(paidTime) = 3;
# 不同用户 id 购买次数
SELECT userId, count(userId) FROM data.orderinfo
WHERE isPaid = '已支付'
AND MONTH(paidTime) = 3
GROUP BY userId;
# 用户复购率
SELECT COUNT(ct), count(if(ct > 1, 1, null)), count(if(ct > 1, 1, null)) / COUNT(ct)
FROM(
SELECT userId, count(userId) AS ct FROM data.orderinfo
WHERE isPaid = '已支付'
AND MONTH(paidTime) = 3
GROUP BY userId) t;
b. 用户回购率
# 每个 useId 在每月的消费情况
SELECT userId, DATE_FORMAT(paidTime, '%Y-%m-01') # 按月统计是否有消费
FROM data.orderinfo
GROUP BY userId, DATE_FORMAT(paidTime, '%Y-%m-01');
# 回购率
SELECT t1.m, COUNT(t1.m), COUNT(t2.m) FROM( # 以月份,t1 月购买总量,t2 月购买总量为三个字段
SELECT userId, DATE_FORMAT(paidTime, '%Y-%m-01') m FROM data.orderinfo
WHERE isPaid = '已支付'
GROUP BY userId, DATE_FORMAT(paidTime, '%Y-%m-01')) t1 # t1 月购买商品的客户 id 和购买日期
LEFT JOIN(
SELECT userId, DATE_FORMAT(paidTime, '%Y-%m-01') m FROM data.orderinfo
WHERE isPaid = '已支付'
GROUP BY userId, DATE_FORMAT(paidTime, '%Y-%m-01')) t2 # t2 月购买的客户 id 和购买日期
ON t1.userId = t2.userId # 以 t1 月和 t2 月都购买商品为条件
AND t1.m = DATE_SUB(t2.m, INTERVAL 1 MONTH) # t2 月比 t1月自增 1
GROUP BY t1.m; # 以月份的升序排列,这里的 m 为 DATE_FORMAT(paidTime, '%Y-%m-01')
# 注:此方法可以计算任意年份的回购率
3. 统计男女用户的消费频次是否有差异
# 过滤性别为空的
SELECT * FROM DATA.userinfo
WHERE sex <> '';
# 不同 userId 男性用户和女性用户的消费次数
SELECT o.userId, sex, count(1)
FROM data.orderinfo o
INNER JOIN(
SELECT * FROM DATA.userinfo
WHERE sex <> '') t
ON o.userId = t.userId
GROUP BY userId, sex;
# 男女消费平均次数
SELECT sex, AVG(ct) FROM(
SELECT o.userId, sex, count(1) AS ct
FROM data.orderinfo o
INNER JOIN(
SELECT * FROM DATA.userinfo
WHERE sex <> '') t
ON o.userId = t.userId
GROUP BY userId, sex) t2
GROUP BY sex;
4. 统计多次消费的顾客第一次和最后一次消费的间隔
# 第一次购买时间和最后一次购买时间
SELECT userId, MAX(paidTime), MIN(paidTime)
FROM data.orderinfo
WHERE isPaid = '已支付'
GROUP BY userId
HAVING COUNT(1);
# 第一次购买和最后一次购买所间隔天数
SELECT userId, MAX(paidTime), MIN(paidTime), DATEDIFF(MAX(paidTime), MIN(paidTime)) # DATEDIFF() 是计算日期差值,若直接用减号则算出来的是相差的秒数
FROM data.orderinfo
WHERE isPaid = '已支付'
GROUP BY userId
HAVING COUNT(1);
# 注:相差为不足一天的认为值是 0
5. 统计不同年龄段,消费金额是否有差异
# 计算不同用户的年龄段
SELECT userId, CEIL((YEAR(NOW()) - YEAR(birth)) / 10) # CEIL() 表示向上取整
FROM data.userinfo
WHERE birth > '1901-00-00';
# 购买商品用户的年龄等信息
SELECT * FROM data.orderinfo o
INNER JOIN(
SELECT userId, CEIL((YEAR(NOW()) - YEAR(birth)) / 10) AS age # CEIL() 表示向上取整
FROM data.userinfo
WHERE birth > '1901-00-00') t # 各个奈年龄段与 orderinfo 表中的交集
ON o.userId = t.userId;
# 各年龄段用户消费频次
SELECT o.userId, age, COUNT(o.userId) FROM data.orderinfo o
INNER JOIN(
SELECT userId, CEIL((YEAR(NOW()) - YEAR(birth)) / 10) AS age # CEIL() 表示向上取整
FROM data.userinfo
WHERE birth > '1901-00-00') t # 各个奈年龄段与 orderinfo 表中的交集
ON o.userId = t.userId
GROUP BY o.userId, age;
# 不同年龄段消费金额差异
SELECT age, AVG(ct)
FROM(
SELECT o.userId, age, COUNT(o.userId) AS ct FROM data.orderinfo o
INNER JOIN(
SELECT userId, CEIL((YEAR(NOW()) - YEAR(birth)) / 10) AS age # CEIL() 表示向上取整
FROM data.userinfo
WHERE birth > '1901-00-00') t # 各个奈年龄段与 orderinfo 表中的交集
ON o.userId = t.userId
GROUP BY o.userId, age) t2
GROUP BY age;
6. 前 20% 的用户贡献了多少额度
# 每个用户的消费总额
SELECT userId, SUM(price)
FROM data.orderinfo o
WHERE isPaid = '已支付'
GROUP BY userId;
# 降序排列
SELECT userId, SUM(price) AS tatal
FROM data.orderinfo o
WHERE isPaid = '已支付'
GROUP BY userId
ORDER BY tatal DESC;
# 统计一共有多少个用户和金额
SELECT COUNT(userId), SUM(total)
FROM(
SELECT userId, SUM(price) AS total
FROM data.orderinfo o
WHERE isPaid = '已支付'
GROUP BY userId
ORDER BY total DESC) as t;
经计算 85649 * 0.2 = 17129
#
SELECT COUNT(userId), SUM(total)
FROM(
SELECT userId, SUM(price) AS total
FROM data.orderinfo o
WHERE isPaid = '已支付'
GROUP BY userId
ORDER BY total DESC
LIMIT 17129) t;