mysql8.0
准备数据
SET NAMES utf8mb4;
SET FOREIGN_KEY_CHECKS = 0;
DROP TABLE IF EXISTS `login`;
CREATE TABLE `login` (
`user_id` int(0) NULL DEFAULT NULL COMMENT '用户id',
`access_time` datetime(0) NULL DEFAULT NULL COMMENT '访问时间',
`page_id` int(0) NULL DEFAULT NULL COMMENT '页面id',
`dt` date NULL DEFAULT NULL COMMENT '登陆日期'
) ENGINE = InnoDB CHARACTER SET = utf8mb3 COLLATE = utf8mb3_general_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Records of login
-- ----------------------------
INSERT INTO `login` VALUES (1, '2021-06-01 11:13:15', 10, '2021-06-01');
INSERT INTO `login` VALUES (1, '2021-06-02 11:13:15', 10, '2021-06-02');
INSERT INTO `login` VALUES (1, '2021-06-03 11:13:15', 10, '2021-06-03');
INSERT INTO `login` VALUES (1, '2021-06-04 11:13:15', 10, '2021-06-04');
INSERT INTO `login` VALUES (1, '2021-06-05 11:13:15', 10, '2021-06-05');
INSERT INTO `login` VALUES (1, '2021-06-06 11:13:15', 10, '2021-06-06');
INSERT INTO `login` VALUES (1, '2021-06-07 11:13:15', 10, '2021-06-07');
INSERT INTO `login` VALUES (2, '2021-06-01 11:13:15', 10, '2021-06-01');
INSERT INTO `login` VALUES (2, '2021-06-03 11:13:15', 10, '2021-06-03');
INSERT INTO `login` VALUES (2, '2021-06-04 11:13:15', 10, '2021-06-04');
INSERT INTO `login` VALUES (2, '2021-06-05 11:13:15', 10, '2021-06-05');
INSERT INTO `login` VALUES (3, '2021-06-01 11:13:15', 10, '2021-06-01');
INSERT INTO `login` VALUES (3, '2021-06-07 11:13:15', 10, '2021-06-07');
INSERT INTO `login` VALUES (3, '2021-06-08 11:13:15', 10, '2021-06-08');
INSERT INTO `login` VALUES (3, '2021-06-09 11:13:15', 10, '2021-06-09');
INSERT INTO `login` VALUES (3, '2021-06-10 11:13:15', 10, '2021-06-10');
INSERT INTO `login` VALUES (3, '2021-06-11 11:13:15', 10, '2021-06-11');
INSERT INTO `login` VALUES (3, '2021-06-12 11:13:15', 10, '2021-06-12');
INSERT INTO `login` VALUES (3, '2021-06-13 11:13:15', 10, '2021-06-13');
INSERT INTO `login` VALUES (4, '2021-06-01 11:13:15', 10, '2021-06-01');
INSERT INTO `login` VALUES (4, '2021-06-03 11:13:15', 10, '2021-06-03');
INSERT INTO `login` VALUES (4, '2021-06-05 11:13:15', 10, '2021-06-05');
INSERT INTO `login` VALUES (4, '2021-06-07 11:13:15', 10, '2021-06-07');
INSERT INTO `login` VALUES (4, '2021-06-09 11:13:15', 10, '2021-06-09');
INSERT INTO `login` VALUES (4, '2021-06-11 11:13:15', 10, '2021-06-11');
INSERT INTO `login` VALUES (5, '2021-06-01 11:13:15', 10, '2021-06-01');
INSERT INTO `login` VALUES (5, '2021-06-07 11:13:15', 10, '2021-06-07');
INSERT INTO `login` VALUES (5, '2021-06-08 11:13:15', 10, '2021-06-08');
INSERT INTO `login` VALUES (5, '2021-06-09 11:13:15', 10, '2021-06-09');
INSERT INTO `login` VALUES (5, '2021-06-11 11:13:15', 10, '2021-06-11');
INSERT INTO `login` VALUES (5, '2021-06-12 11:13:15', 10, '2021-06-12');
INSERT INTO `login` VALUES (5, '2021-06-13 11:13:15', 10, '2021-06-13');
SET FOREIGN_KEY_CHECKS = 1;
解题思路:
①先将用户按照id进行分组,然后再用窗口函数进行排序
②将日期与排名做差 (如果用户是连续登陆,那么差值日期的结果是一样的)
③根据用户与时间差分类,统计出现的次数,如果次数在7日以上,则是想要的结果
select user_id -- ,count(*)
from (
select *, date_sub(dt, interval ranking day) diff
from(
select *, row_number() over(partition by user_id order by dt) ranking
from login
where month(dt)=6
) as t
) as t1
group by user_id, diff
having count(*) >= 7;
重点在第二步
SET NAMES utf8mb4;
SET FOREIGN_KEY_CHECKS = 0;
-- ----------------------------
-- Table structure for click
-- ----------------------------
DROP TABLE IF EXISTS `click`;
CREATE TABLE `click` (
`user_id` int(0) NULL DEFAULT NULL COMMENT '用户id',
`click_time` datetime(0) NULL DEFAULT NULL COMMENT '点击时间'
) ENGINE = InnoDB CHARACTER SET = utf8mb3 COLLATE = utf8mb3_general_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Records of click
-- ----------------------------
INSERT INTO `click` VALUES (1, '2021-06-01 11:13:15');
INSERT INTO `click` VALUES (1, '2021-06-01 11:15:15');
INSERT INTO `click` VALUES (1, '2021-06-01 11:17:15');
INSERT INTO `click` VALUES (1, '2021-06-01 11:19:15');
INSERT INTO `click` VALUES (1, '2021-06-01 11:21:15');
INSERT INTO `click` VALUES (1, '2021-06-01 11:23:15');
INSERT INTO `click` VALUES (1, '2021-06-01 11:25:15');
INSERT INTO `click` VALUES (2, '2021-06-01 11:21:15');
INSERT INTO `click` VALUES (2, '2021-06-01 11:21:25');
INSERT INTO `click` VALUES (2, '2021-06-01 11:21:35');
SET FOREIGN_KEY_CHECKS = 1;
解题思路:
表记录了点击的流水信息,包括用户id ,和点击时间
row_number() over(order by click_time) as rank_1 结果 1 2 3 4 5 6 7 8 9 10
row_number() over(partition by user_id order by click_time) as rank_2 结果 1 2 3 4 5 6 7 1 2 3
rank_1- rank2 得到 0 0 0 0 0 3 3 5 5 5
这时我们发现只需要对diff进行分组计数大于3个,就是连续点击大于三且中间没有其他人点击的用户
select distinct user_id
from(
select *, rank_1- rank_2 as diff
from(
select *,
row_number() over(order by click_time) as rank_1,
row_number() over(partition by user_id order by click_time) as rank_2
from click
) b
) c
group by diff,user_id
having count(diff) >=3
emp 表
id 员工 id ,deptno 部门编号,salary 工资
SET NAMES utf8mb4;
SET FOREIGN_KEY_CHECKS = 0;
-- ----------------------------
-- Table structure for emp
-- ----------------------------
DROP TABLE IF EXISTS `emp`;
CREATE TABLE `emp` (
`id` int(0) NOT NULL COMMENT '员工 id',
`deptno` int(0) NULL DEFAULT NULL COMMENT '部门编号',
`salary` decimal(10, 2) NULL DEFAULT NULL COMMENT '工资',
PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb3 COLLATE = utf8mb3_general_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Records of emp
-- ----------------------------
INSERT INTO `emp` VALUES (1, 1, 1.00);
INSERT INTO `emp` VALUES (2, 1, 101.00);
INSERT INTO `emp` VALUES (3, 1, 102.00);
INSERT INTO `emp` VALUES (4, 1, 103.00);
INSERT INTO `emp` VALUES (5, 1, 200.00);
INSERT INTO `emp` VALUES (6, 2, 2.00);
INSERT INTO `emp` VALUES (7, 2, 106.00);
INSERT INTO `emp` VALUES (8, 2, 107.00);
INSERT INTO `emp` VALUES (9, 2, 108.00);
INSERT INTO `emp` VALUES (10, 2, 400.00);
SET FOREIGN_KEY_CHECKS = 1;
核心是使用窗口函数降序和升序分别排一遍就取出了最高和最低。
select a.deptno,avg(a.salary)
from (
select *,
rank() over( partition by deptno order by salary ) as rank_1,
rank() over( partition by deptno order by salary desc) as rank_2
from emp
) a
where a.rank_1 >1 and a.rank_2 >1
group by a.deptno
问题:两支篮球队进行了激烈的篮球比赛,比分交替上升。比赛结束后,你有一张两队得分分数的明细表,记录了球队team,球员号码number,球员姓名name, 得分分数score 以及得分时间scoretime(datetime)。现在球队要对比赛中表现突出的球员做出嘉奖,所以请你用sql统计出
1)连续三次(及以上)为球队得分的球员名单
2)比赛中帮助各自球队反超比分的球员姓名以及对应时间。
表结构及数据
SET NAMES utf8mb4;
SET FOREIGN_KEY_CHECKS = 0;
-- ----------------------------
-- Table structure for basketball_game_score_detail
-- ----------------------------
DROP TABLE IF EXISTS `basketball_game_score_detail`;
CREATE TABLE `basketball_game_score_detail` (
`team` varchar(40) CHARACTER SET utf8mb3 COLLATE utf8mb3_general_ci NOT NULL,
`number` varchar(100) CHARACTER SET utf8mb3 COLLATE utf8mb3_general_ci NOT NULL,
`score_time` timestamp(0) NOT NULL,
`score` int(0) NOT NULL,
`name` varchar(100) CHARACTER SET utf8mb3 COLLATE utf8mb3_general_ci NOT NULL
) ENGINE = InnoDB CHARACTER SET = utf8mb3 COLLATE = utf8mb3_general_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Records of basketball_game_score_detail
-- ----------------------------
INSERT INTO `basketball_game_score_detail` VALUES ('A', '1', '2020-08-28 09:01:14', 1, 'A1');
INSERT INTO `basketball_game_score_detail` VALUES ('A', '5', '2020-08-28 09:02:28', 1, 'A5');
INSERT INTO `basketball_game_score_detail` VALUES ('B', '4', '2020-08-28 09:03:42', 3, 'B4');
INSERT INTO `basketball_game_score_detail` VALUES ('A', '4', '2020-08-28 09:04:55', 3, 'A4');
INSERT INTO `basketball_game_score_detail` VALUES ('B', '1', '2020-08-28 09:06:09', 3, 'B1');
INSERT INTO `basketball_game_score_detail` VALUES ('A', '3', '2020-08-28 09:07:23', 3, 'A3');
INSERT INTO `basketball_game_score_detail` VALUES ('A', '4', '2020-08-28 09:08:37', 3, 'A4');
INSERT INTO `basketball_game_score_detail` VALUES ('B', '1', '2020-08-28 09:09:51', 2, 'B1');
INSERT INTO `basketball_game_score_detail` VALUES ('B', '2', '2020-08-28 09:11:05', 2, 'B2');
INSERT INTO `basketball_game_score_detail` VALUES ('B', '4', '2020-08-28 09:12:18', 1, 'B4');
INSERT INTO `basketball_game_score_detail` VALUES ('A', '1', '2020-08-28 09:13:32', 2, 'A1');
INSERT INTO `basketball_game_score_detail` VALUES ('A', '1', '2020-08-28 09:14:46', 1, 'A1');
INSERT INTO `basketball_game_score_detail` VALUES ('A', '4', '2020-08-28 09:16:00', 1, 'A4');
INSERT INTO `basketball_game_score_detail` VALUES ('B', '3', '2020-08-28 09:17:14', 3, 'B3');
INSERT INTO `basketball_game_score_detail` VALUES ('B', '2', '2020-08-28 09:18:28', 3, 'B2');
INSERT INTO `basketball_game_score_detail` VALUES ('A', '2', '2020-08-28 09:19:42', 3, 'A2');
INSERT INTO `basketball_game_score_detail` VALUES ('A', '1', '2020-08-28 09:20:55', 1, 'A1');
INSERT INTO `basketball_game_score_detail` VALUES ('B', '3', '2020-08-28 09:22:09', 2, 'B3');
INSERT INTO `basketball_game_score_detail` VALUES ('B', '3', '2020-08-28 09:23:23', 3, 'B3');
INSERT INTO `basketball_game_score_detail` VALUES ('A', '5', '2020-08-28 09:24:37', 2, 'A5');
INSERT INTO `basketball_game_score_detail` VALUES ('B', '1', '2020-08-28 09:25:51', 3, 'B1');
INSERT INTO `basketball_game_score_detail` VALUES ('B', '2', '2020-08-28 09:27:05', 1, 'B2');
INSERT INTO `basketball_game_score_detail` VALUES ('A', '3', '2020-08-28 09:28:18', 1, 'A3');
INSERT INTO `basketball_game_score_detail` VALUES ('B', '4', '2020-08-28 09:29:32', 1, 'B4');
INSERT INTO `basketball_game_score_detail` VALUES ('A', '1', '2020-08-28 09:30:46', 3, 'A1');
INSERT INTO `basketball_game_score_detail` VALUES ('B', '1', '2020-08-28 09:32:00', 1, 'B1');
INSERT INTO `basketball_game_score_detail` VALUES ('A', '4', '2020-08-28 09:33:14', 2, 'A4');
INSERT INTO `basketball_game_score_detail` VALUES ('B', '1', '2020-08-28 09:34:28', 1, 'B1');
INSERT INTO `basketball_game_score_detail` VALUES ('B', '5', '2020-08-28 09:35:42', 2, 'B5');
INSERT INTO `basketball_game_score_detail` VALUES ('A', '1', '2020-08-28 09:36:55', 1, 'A1');
INSERT INTO `basketball_game_score_detail` VALUES ('B', '1', '2020-08-28 09:38:09', 3, 'B1');
INSERT INTO `basketball_game_score_detail` VALUES ('A', '1', '2020-08-28 09:39:23', 3, 'A1');
INSERT INTO `basketball_game_score_detail` VALUES ('B', '2', '2020-08-28 09:40:37', 3, 'B2');
INSERT INTO `basketball_game_score_detail` VALUES ('A', '3', '2020-08-28 09:41:51', 3, 'A3');
INSERT INTO `basketball_game_score_detail` VALUES ('A', '1', '2020-08-28 09:43:05', 2, 'A1');
INSERT INTO `basketball_game_score_detail` VALUES ('B', '3', '2020-08-28 09:44:18', 3, 'B3');
INSERT INTO `basketball_game_score_detail` VALUES ('A', '5', '2020-08-28 09:45:32', 2, 'A5');
INSERT INTO `basketball_game_score_detail` VALUES ('B', '5', '2020-08-28 09:46:46', 3, 'B5');
SET FOREIGN_KEY_CHECKS = 1;
1)连续三次(及以上)为球队得分的球员名单
同一队球员连续得分(包括中间穿插对手球员得连续得分)
select name,rank2-rank3 as diff,count(1)
from(
SELECT team,
number,
name,
RANK() OVER (ORDER BY score_time) AS rank1,
RANK() OVER (PARTITION BY team ORDER BY score_time) AS rank2,
RANK() OVER (PARTITION BY name ORDER BY score_time) AS rank3
FROM basketball_game_score_detail
) a
group by name,diff
having count(1)>=2;
同一队球员连续得分(不不不 包括中间穿插对手球员得连续得分)
select name,rank1-rank2 as diff,count(1)
from(
SELECT team,
number,
name,
RANK() OVER (ORDER BY score_time) AS rank1,
RANK() OVER (PARTITION BY team ORDER BY score_time) AS rank2,
RANK() OVER (PARTITION BY name ORDER BY score_time) AS rank3
FROM basketball_game_score_detail
) mid
group by name,(rank1-rank2)
having count(1)>=2;
2)比赛中帮助各自球队反超比分的球员姓名以及对应时间。
select *
from(
SELECT name,
score,
score_time,
acumsum,
bcumsum,
(acumsum - bcumsum) AS err,
LAG(acumsum - bcumsum,1,0) OVER (ORDER BY score_time) AS lasterr
FROM (
SELECT team,
NUMBER,
name,
score,
score_time,
SUM(CASE WHEN team = 'A' THEN score ELSE 0 END) OVER (ORDER BY score_time) AS acumsum,
SUM(CASE WHEN team = 'B' THEN score ELSE 0 END) OVER (ORDER BY score_time) AS bcumsum
FROM basketball_game_score_detail
) mid
) mid2
where err*lasterr < 0
1,7日和30日用户留存率的定义。现定为新用户第一次登录时间为第0天,新用户定义为第一次登录的用户,登录行为的代号为1。次日留存率:(第0天新增的用户中,新增日之后的第1天还登录的用户数)/第0天新增总用户数;7日留存率:(第0天新增的用户中,新增日之后的第7天还登录的用户数)/第0天新增总用户数;30日留存率:(第0天新增的用户中,新增日之后的第30天还登录的用户数)/第0天新增总用户数;
(注意:留存一般是离散的概念,不要求用户在N天内每天都登录)
2,摘选出每天的新用户
3,列出每个新用户第一次登录的日期及此日期之后仍登录的日期
4,计算列出的登录日期之间的差值,如果相差1天,说明该新用户次日仍留存,如果相差7天,说明该新用户七日仍留存,以此类推
5,统计每天新用户的留存人数以及计算留存率
(理解需求这一个题不难解答)
导入数据
SET NAMES utf8mb4;
SET FOREIGN_KEY_CHECKS = 0;
DROP TABLE IF EXISTS `登录信息`;
CREATE TABLE `登录信息` (
`用户id` int(0) NOT NULL,
`登陆时间` varchar(255) CHARACTER SET utf8mb3 COLLATE utf8mb3_general_ci NULL DEFAULT NULL
) ENGINE = InnoDB CHARACTER SET = utf8mb3 COLLATE = utf8mb3_general_ci ROW_FORMAT = Dynamic;
-- ----------------------------
-- Records of 登录信息
-- ----------------------------
INSERT INTO `登录信息` VALUES (1, '2022-10-11');
INSERT INTO `登录信息` VALUES (1, '2022-10-12');
INSERT INTO `登录信息` VALUES (1, '2022-10-14');
INSERT INTO `登录信息` VALUES (1, '2022-10-16');
INSERT INTO `登录信息` VALUES (1, '2022-10-17');
INSERT INTO `登录信息` VALUES (1, '2022-10-18');
INSERT INTO `登录信息` VALUES (1, '2022-10-19');
INSERT INTO `登录信息` VALUES (1, '2022-10-21');
INSERT INTO `登录信息` VALUES (2, '2022-10-11');
INSERT INTO `登录信息` VALUES (2, '2022-10-12');
INSERT INTO `登录信息` VALUES (2, '2022-10-13');
INSERT INTO `登录信息` VALUES (2, '2022-10-14');
INSERT INTO `登录信息` VALUES (2, '2022-10-15');
INSERT INTO `登录信息` VALUES (2, '2022-10-16');
INSERT INTO `登录信息` VALUES (2, '2022-10-17');
INSERT INTO `登录信息` VALUES (2, '2022-10-18');
INSERT INTO `登录信息` VALUES (3, '2022-10-11');
INSERT INTO `登录信息` VALUES (3, '2022-10-13');
INSERT INTO `登录信息` VALUES (3, '2022-10-14');
INSERT INTO `登录信息` VALUES (3, '2022-10-17');
INSERT INTO `登录信息` VALUES (3, '2022-10-19');
INSERT INTO `登录信息` VALUES (3, '2022-10-20');
INSERT INTO `登录信息` VALUES (3, '2022-10-21');
INSERT INTO `登录信息` VALUES (3, '2022-10-22');
SET FOREIGN_KEY_CHECKS = 1;
具体实现
select d.a_t,
count(distinct case when d.时间间隔=1 then d.用户id
else null
end) as 次日留存数,
count(distinct case when 时间间隔=1 then d.用户id
else null
end) /count(distinct d.用户id) as 次日留存率,
count(distinct case when d.时间间隔=3 then d.用户id
else null
end) as 3日留存数 ,
count(distinct case when 时间间隔=3 then d.用户id
else null
end) /count(distinct d.用户id) as 3日留存率,
count(distinct case when d.时间间隔=7 then d.用户id
else null
end) as 7日留存数 ,
count(distinct case when 时间间隔=7 then d.用户id
else null
end) /count(distinct d.用户id) as 7日留存率
from(
select *,timestampdiff(day,a_t,b_t) as 时间间隔
from (
select a.`用户id`,a.登陆时间 as a_t ,b.登陆时间 as b_t
from 登录信息 as a
left join 登录信息 as b
on a.`用户id`=b.`用户id` and a.登陆时间 < b.登陆时间
) as c
) as d
group by d.a_t;