几个常见的SQL面试题

1、连续登录天3天,30天,最大连续登陆天数的问题等。

mysql8.0
准备数据

SET NAMES utf8mb4;
SET FOREIGN_KEY_CHECKS = 0;

DROP TABLE IF EXISTS `login`;
CREATE TABLE `login`  (
  `user_id` int(0) NULL DEFAULT NULL COMMENT '用户id',
  `access_time` datetime(0) NULL DEFAULT NULL COMMENT '访问时间',
  `page_id` int(0) NULL DEFAULT NULL COMMENT '页面id',
  `dt` date NULL DEFAULT NULL COMMENT '登陆日期'
) ENGINE = InnoDB CHARACTER SET = utf8mb3 COLLATE = utf8mb3_general_ci ROW_FORMAT = Dynamic;

-- ----------------------------
-- Records of login
-- ----------------------------
INSERT INTO `login` VALUES (1, '2021-06-01 11:13:15', 10, '2021-06-01');
INSERT INTO `login` VALUES (1, '2021-06-02 11:13:15', 10, '2021-06-02');
INSERT INTO `login` VALUES (1, '2021-06-03 11:13:15', 10, '2021-06-03');
INSERT INTO `login` VALUES (1, '2021-06-04 11:13:15', 10, '2021-06-04');
INSERT INTO `login` VALUES (1, '2021-06-05 11:13:15', 10, '2021-06-05');
INSERT INTO `login` VALUES (1, '2021-06-06 11:13:15', 10, '2021-06-06');
INSERT INTO `login` VALUES (1, '2021-06-07 11:13:15', 10, '2021-06-07');
INSERT INTO `login` VALUES (2, '2021-06-01 11:13:15', 10, '2021-06-01');
INSERT INTO `login` VALUES (2, '2021-06-03 11:13:15', 10, '2021-06-03');
INSERT INTO `login` VALUES (2, '2021-06-04 11:13:15', 10, '2021-06-04');
INSERT INTO `login` VALUES (2, '2021-06-05 11:13:15', 10, '2021-06-05');
INSERT INTO `login` VALUES (3, '2021-06-01 11:13:15', 10, '2021-06-01');
INSERT INTO `login` VALUES (3, '2021-06-07 11:13:15', 10, '2021-06-07');
INSERT INTO `login` VALUES (3, '2021-06-08 11:13:15', 10, '2021-06-08');
INSERT INTO `login` VALUES (3, '2021-06-09 11:13:15', 10, '2021-06-09');
INSERT INTO `login` VALUES (3, '2021-06-10 11:13:15', 10, '2021-06-10');
INSERT INTO `login` VALUES (3, '2021-06-11 11:13:15', 10, '2021-06-11');
INSERT INTO `login` VALUES (3, '2021-06-12 11:13:15', 10, '2021-06-12');
INSERT INTO `login` VALUES (3, '2021-06-13 11:13:15', 10, '2021-06-13');
INSERT INTO `login` VALUES (4, '2021-06-01 11:13:15', 10, '2021-06-01');
INSERT INTO `login` VALUES (4, '2021-06-03 11:13:15', 10, '2021-06-03');
INSERT INTO `login` VALUES (4, '2021-06-05 11:13:15', 10, '2021-06-05');
INSERT INTO `login` VALUES (4, '2021-06-07 11:13:15', 10, '2021-06-07');
INSERT INTO `login` VALUES (4, '2021-06-09 11:13:15', 10, '2021-06-09');
INSERT INTO `login` VALUES (4, '2021-06-11 11:13:15', 10, '2021-06-11');
INSERT INTO `login` VALUES (5, '2021-06-01 11:13:15', 10, '2021-06-01');
INSERT INTO `login` VALUES (5, '2021-06-07 11:13:15', 10, '2021-06-07');
INSERT INTO `login` VALUES (5, '2021-06-08 11:13:15', 10, '2021-06-08');
INSERT INTO `login` VALUES (5, '2021-06-09 11:13:15', 10, '2021-06-09');
INSERT INTO `login` VALUES (5, '2021-06-11 11:13:15', 10, '2021-06-11');
INSERT INTO `login` VALUES (5, '2021-06-12 11:13:15', 10, '2021-06-12');
INSERT INTO `login` VALUES (5, '2021-06-13 11:13:15', 10, '2021-06-13');

SET FOREIGN_KEY_CHECKS = 1;

解题思路:
①先将用户按照id进行分组,然后再用窗口函数进行排序
②将日期与排名做差 (如果用户是连续登陆,那么差值日期的结果是一样的)
③根据用户与时间差分类,统计出现的次数,如果次数在7日以上,则是想要的结果

select user_id -- ,count(*) 
from (
	select *, date_sub(dt, interval ranking day) diff 
	from(
		select *, row_number() over(partition by user_id order by dt) ranking 
		from login 
		where month(dt)=6
	) as t
) as t1
group by user_id, diff 
having count(*) >= 7;

重点在第二步

2、求连续点击三次的用户数,而且中间不能有别人的点击

SET NAMES utf8mb4;
SET FOREIGN_KEY_CHECKS = 0;

-- ----------------------------
-- Table structure for click
-- ----------------------------
DROP TABLE IF EXISTS `click`;
CREATE TABLE `click`  (
  `user_id` int(0) NULL DEFAULT NULL COMMENT '用户id',
  `click_time` datetime(0) NULL DEFAULT NULL COMMENT '点击时间'
) ENGINE = InnoDB CHARACTER SET = utf8mb3 COLLATE = utf8mb3_general_ci ROW_FORMAT = Dynamic;

-- ----------------------------
-- Records of click
-- ----------------------------
INSERT INTO `click` VALUES (1, '2021-06-01 11:13:15');
INSERT INTO `click` VALUES (1, '2021-06-01 11:15:15');
INSERT INTO `click` VALUES (1, '2021-06-01 11:17:15');
INSERT INTO `click` VALUES (1, '2021-06-01 11:19:15');
INSERT INTO `click` VALUES (1, '2021-06-01 11:21:15');
INSERT INTO `click` VALUES (1, '2021-06-01 11:23:15');
INSERT INTO `click` VALUES (1, '2021-06-01 11:25:15');
INSERT INTO `click` VALUES (2, '2021-06-01 11:21:15');
INSERT INTO `click` VALUES (2, '2021-06-01 11:21:25');
INSERT INTO `click` VALUES (2, '2021-06-01 11:21:35');

SET FOREIGN_KEY_CHECKS = 1;

解题思路:

表记录了点击的流水信息,包括用户id ,和点击时间

row_number() over(order by click_time) as  rank_1 结果 1 2 3 4 5 6 7 8 9 10
row_number() over(partition by user_id order by click_time) as rank_2 结果 1 2 3 4 5 6 7 1 2 3

rank_1- rank2 得到 0 0 0 0 0 3 3 5 5 5

这时我们发现只需要对diff进行分组计数大于3个,就是连续点击大于三且中间没有其他人点击的用户

select distinct user_id
from(
   select *, rank_1- rank_2  as diff
   from(
      select *,
      row_number() over(order by click_time) as  rank_1,
      row_number() over(partition by user_id order by click_time) as rank_2
      from click
   ) b
) c
group by diff,user_id
having count(diff) >=3

3、计算除去部门最高工资,和最低工资的平均工资

emp 表
id 员工 id ,deptno 部门编号,salary 工资

SET NAMES utf8mb4;
SET FOREIGN_KEY_CHECKS = 0;

-- ----------------------------
-- Table structure for emp
-- ----------------------------
DROP TABLE IF EXISTS `emp`;
CREATE TABLE `emp`  (
  `id` int(0) NOT NULL COMMENT '员工 id',
  `deptno` int(0) NULL DEFAULT NULL COMMENT '部门编号',
  `salary` decimal(10, 2) NULL DEFAULT NULL COMMENT '工资',
  PRIMARY KEY (`id`) USING BTREE
) ENGINE = InnoDB CHARACTER SET = utf8mb3 COLLATE = utf8mb3_general_ci ROW_FORMAT = Dynamic;

-- ----------------------------
-- Records of emp
-- ----------------------------
INSERT INTO `emp` VALUES (1, 1, 1.00);
INSERT INTO `emp` VALUES (2, 1, 101.00);
INSERT INTO `emp` VALUES (3, 1, 102.00);
INSERT INTO `emp` VALUES (4, 1, 103.00);
INSERT INTO `emp` VALUES (5, 1, 200.00);
INSERT INTO `emp` VALUES (6, 2, 2.00);
INSERT INTO `emp` VALUES (7, 2, 106.00);
INSERT INTO `emp` VALUES (8, 2, 107.00);
INSERT INTO `emp` VALUES (9, 2, 108.00);
INSERT INTO `emp` VALUES (10, 2, 400.00);

SET FOREIGN_KEY_CHECKS = 1;

核心是使用窗口函数降序和升序分别排一遍就取出了最高和最低。

select a.deptno,avg(a.salary)
from (
	select *, 
		rank() over( partition by deptno order by salary ) as rank_1, 
		rank() over( partition by deptno order by salary desc) as rank_2 
	from emp
) a 
where a.rank_1 >1 and a.rank_2 >1 
group by a.deptno

4、AB球队得分流水表,得到连续三次得分的队员名字 和每次赶超对手的球员名字

问题:两支篮球队进行了激烈的篮球比赛,比分交替上升。比赛结束后,你有一张两队得分分数的明细表,记录了球队team,球员号码number,球员姓名name, 得分分数score 以及得分时间scoretime(datetime)。现在球队要对比赛中表现突出的球员做出嘉奖,所以请你用sql统计出

1)连续三次(及以上)为球队得分的球员名单
2)比赛中帮助各自球队反超比分的球员姓名以及对应时间。

表结构及数据

SET NAMES utf8mb4;
SET FOREIGN_KEY_CHECKS = 0;

-- ----------------------------
-- Table structure for basketball_game_score_detail
-- ----------------------------
DROP TABLE IF EXISTS `basketball_game_score_detail`;
CREATE TABLE `basketball_game_score_detail`  (
  `team` varchar(40) CHARACTER SET utf8mb3 COLLATE utf8mb3_general_ci NOT NULL,
  `number` varchar(100) CHARACTER SET utf8mb3 COLLATE utf8mb3_general_ci NOT NULL,
  `score_time` timestamp(0) NOT NULL,
  `score` int(0) NOT NULL,
  `name` varchar(100) CHARACTER SET utf8mb3 COLLATE utf8mb3_general_ci NOT NULL
) ENGINE = InnoDB CHARACTER SET = utf8mb3 COLLATE = utf8mb3_general_ci ROW_FORMAT = Dynamic;

-- ----------------------------
-- Records of basketball_game_score_detail
-- ----------------------------
INSERT INTO `basketball_game_score_detail` VALUES ('A', '1', '2020-08-28 09:01:14', 1, 'A1');
INSERT INTO `basketball_game_score_detail` VALUES ('A', '5', '2020-08-28 09:02:28', 1, 'A5');
INSERT INTO `basketball_game_score_detail` VALUES ('B', '4', '2020-08-28 09:03:42', 3, 'B4');
INSERT INTO `basketball_game_score_detail` VALUES ('A', '4', '2020-08-28 09:04:55', 3, 'A4');
INSERT INTO `basketball_game_score_detail` VALUES ('B', '1', '2020-08-28 09:06:09', 3, 'B1');
INSERT INTO `basketball_game_score_detail` VALUES ('A', '3', '2020-08-28 09:07:23', 3, 'A3');
INSERT INTO `basketball_game_score_detail` VALUES ('A', '4', '2020-08-28 09:08:37', 3, 'A4');
INSERT INTO `basketball_game_score_detail` VALUES ('B', '1', '2020-08-28 09:09:51', 2, 'B1');
INSERT INTO `basketball_game_score_detail` VALUES ('B', '2', '2020-08-28 09:11:05', 2, 'B2');
INSERT INTO `basketball_game_score_detail` VALUES ('B', '4', '2020-08-28 09:12:18', 1, 'B4');
INSERT INTO `basketball_game_score_detail` VALUES ('A', '1', '2020-08-28 09:13:32', 2, 'A1');
INSERT INTO `basketball_game_score_detail` VALUES ('A', '1', '2020-08-28 09:14:46', 1, 'A1');
INSERT INTO `basketball_game_score_detail` VALUES ('A', '4', '2020-08-28 09:16:00', 1, 'A4');
INSERT INTO `basketball_game_score_detail` VALUES ('B', '3', '2020-08-28 09:17:14', 3, 'B3');
INSERT INTO `basketball_game_score_detail` VALUES ('B', '2', '2020-08-28 09:18:28', 3, 'B2');
INSERT INTO `basketball_game_score_detail` VALUES ('A', '2', '2020-08-28 09:19:42', 3, 'A2');
INSERT INTO `basketball_game_score_detail` VALUES ('A', '1', '2020-08-28 09:20:55', 1, 'A1');
INSERT INTO `basketball_game_score_detail` VALUES ('B', '3', '2020-08-28 09:22:09', 2, 'B3');
INSERT INTO `basketball_game_score_detail` VALUES ('B', '3', '2020-08-28 09:23:23', 3, 'B3');
INSERT INTO `basketball_game_score_detail` VALUES ('A', '5', '2020-08-28 09:24:37', 2, 'A5');
INSERT INTO `basketball_game_score_detail` VALUES ('B', '1', '2020-08-28 09:25:51', 3, 'B1');
INSERT INTO `basketball_game_score_detail` VALUES ('B', '2', '2020-08-28 09:27:05', 1, 'B2');
INSERT INTO `basketball_game_score_detail` VALUES ('A', '3', '2020-08-28 09:28:18', 1, 'A3');
INSERT INTO `basketball_game_score_detail` VALUES ('B', '4', '2020-08-28 09:29:32', 1, 'B4');
INSERT INTO `basketball_game_score_detail` VALUES ('A', '1', '2020-08-28 09:30:46', 3, 'A1');
INSERT INTO `basketball_game_score_detail` VALUES ('B', '1', '2020-08-28 09:32:00', 1, 'B1');
INSERT INTO `basketball_game_score_detail` VALUES ('A', '4', '2020-08-28 09:33:14', 2, 'A4');
INSERT INTO `basketball_game_score_detail` VALUES ('B', '1', '2020-08-28 09:34:28', 1, 'B1');
INSERT INTO `basketball_game_score_detail` VALUES ('B', '5', '2020-08-28 09:35:42', 2, 'B5');
INSERT INTO `basketball_game_score_detail` VALUES ('A', '1', '2020-08-28 09:36:55', 1, 'A1');
INSERT INTO `basketball_game_score_detail` VALUES ('B', '1', '2020-08-28 09:38:09', 3, 'B1');
INSERT INTO `basketball_game_score_detail` VALUES ('A', '1', '2020-08-28 09:39:23', 3, 'A1');
INSERT INTO `basketball_game_score_detail` VALUES ('B', '2', '2020-08-28 09:40:37', 3, 'B2');
INSERT INTO `basketball_game_score_detail` VALUES ('A', '3', '2020-08-28 09:41:51', 3, 'A3');
INSERT INTO `basketball_game_score_detail` VALUES ('A', '1', '2020-08-28 09:43:05', 2, 'A1');
INSERT INTO `basketball_game_score_detail` VALUES ('B', '3', '2020-08-28 09:44:18', 3, 'B3');
INSERT INTO `basketball_game_score_detail` VALUES ('A', '5', '2020-08-28 09:45:32', 2, 'A5');
INSERT INTO `basketball_game_score_detail` VALUES ('B', '5', '2020-08-28 09:46:46', 3, 'B5');

SET FOREIGN_KEY_CHECKS = 1;

1)连续三次(及以上)为球队得分的球员名单

同一队球员连续得分(包括中间穿插对手球员得连续得分)
select  name,rank2-rank3 as diff,count(1) 
from(
    SELECT team,
       number,
       name,
       RANK() OVER (ORDER BY score_time) AS rank1,
       RANK() OVER (PARTITION BY team ORDER BY score_time) AS rank2,
       RANK() OVER (PARTITION BY name ORDER BY score_time) AS rank3
    FROM basketball_game_score_detail
) a 
group by name,diff
having count(1)>=2;

同一队球员连续得分(不不不 包括中间穿插对手球员得连续得分)
select  name,rank1-rank2 as diff,count(1) 
from(
    SELECT team,
       number,
       name,
       RANK() OVER (ORDER BY score_time) AS rank1,
       RANK() OVER (PARTITION BY team ORDER BY score_time) AS rank2,
       RANK() OVER (PARTITION BY name ORDER BY score_time) AS rank3
    FROM basketball_game_score_detail
) mid
group by name,(rank1-rank2) 
having count(1)>=2;

2)比赛中帮助各自球队反超比分的球员姓名以及对应时间。

select * 
from(
    SELECT name,
       score,
       score_time,
       acumsum,
       bcumsum,
       (acumsum - bcumsum) AS err,
       LAG(acumsum - bcumsum,1,0) OVER (ORDER BY score_time) AS lasterr
    FROM (
        SELECT team,
             NUMBER,
             name,
             score,
             score_time,
             SUM(CASE WHEN team = 'A' THEN score ELSE 0 END) OVER (ORDER BY score_time) AS acumsum,
             SUM(CASE WHEN team = 'B' THEN score ELSE 0 END) OVER (ORDER BY score_time) AS bcumsum
        FROM basketball_game_score_detail
    ) mid
) mid2
where err*lasterr < 0

5,留存问题

1,7日和30日用户留存率的定义。现定为新用户第一次登录时间为第0天,新用户定义为第一次登录的用户,登录行为的代号为1。次日留存率:(第0天新增的用户中,新增日之后的第1天还登录的用户数)/第0天新增总用户数;7日留存率:(第0天新增的用户中,新增日之后的第7天还登录的用户数)/第0天新增总用户数;30日留存率:(第0天新增的用户中,新增日之后的第30天还登录的用户数)/第0天新增总用户数;
(注意:留存一般是离散的概念,不要求用户在N天内每天都登录)
2,摘选出每天的新用户
3,列出每个新用户第一次登录的日期及此日期之后仍登录的日期
4,计算列出的登录日期之间的差值,如果相差1天,说明该新用户次日仍留存,如果相差7天,说明该新用户七日仍留存,以此类推
5,统计每天新用户的留存人数以及计算留存率
(理解需求这一个题不难解答)

导入数据

SET NAMES utf8mb4;
SET FOREIGN_KEY_CHECKS = 0;

DROP TABLE IF EXISTS `登录信息`;
CREATE TABLE `登录信息`  (
  `用户id` int(0) NOT NULL,
  `登陆时间` varchar(255) CHARACTER SET utf8mb3 COLLATE utf8mb3_general_ci NULL DEFAULT NULL
) ENGINE = InnoDB CHARACTER SET = utf8mb3 COLLATE = utf8mb3_general_ci ROW_FORMAT = Dynamic;

-- ----------------------------
-- Records of 登录信息
-- ----------------------------
INSERT INTO `登录信息` VALUES (1, '2022-10-11');
INSERT INTO `登录信息` VALUES (1, '2022-10-12');
INSERT INTO `登录信息` VALUES (1, '2022-10-14');
INSERT INTO `登录信息` VALUES (1, '2022-10-16');
INSERT INTO `登录信息` VALUES (1, '2022-10-17');
INSERT INTO `登录信息` VALUES (1, '2022-10-18');
INSERT INTO `登录信息` VALUES (1, '2022-10-19');
INSERT INTO `登录信息` VALUES (1, '2022-10-21');
INSERT INTO `登录信息` VALUES (2, '2022-10-11');
INSERT INTO `登录信息` VALUES (2, '2022-10-12');
INSERT INTO `登录信息` VALUES (2, '2022-10-13');
INSERT INTO `登录信息` VALUES (2, '2022-10-14');
INSERT INTO `登录信息` VALUES (2, '2022-10-15');
INSERT INTO `登录信息` VALUES (2, '2022-10-16');
INSERT INTO `登录信息` VALUES (2, '2022-10-17');
INSERT INTO `登录信息` VALUES (2, '2022-10-18');
INSERT INTO `登录信息` VALUES (3, '2022-10-11');
INSERT INTO `登录信息` VALUES (3, '2022-10-13');
INSERT INTO `登录信息` VALUES (3, '2022-10-14');
INSERT INTO `登录信息` VALUES (3, '2022-10-17');
INSERT INTO `登录信息` VALUES (3, '2022-10-19');
INSERT INTO `登录信息` VALUES (3, '2022-10-20');
INSERT INTO `登录信息` VALUES (3, '2022-10-21');
INSERT INTO `登录信息` VALUES (3, '2022-10-22');

SET FOREIGN_KEY_CHECKS = 1;

具体实现

select d.a_t,
			count(distinct case when d.时间间隔=1 then d.用户id     
										else null
										end) as  次日留存数, 
			count(distinct case when 时间间隔=1 then d.用户id
										else null
										end) /count(distinct d.用户id) as 次日留存率,
			count(distinct case when d.时间间隔=3 then d.用户id     
										else null
										end) as  3日留存数 ,
			count(distinct case when 时间间隔=3 then d.用户id
										else null
										end) /count(distinct d.用户id) as 3日留存率,
			count(distinct case when d.时间间隔=7 then d.用户id     
										else null
										end) as  7日留存数 ,
			count(distinct case when 时间间隔=7 then d.用户id
										else null
										end) /count(distinct d.用户id) as 7日留存率
from(
		select *,timestampdiff(day,a_t,b_t) as 时间间隔
		from (
				select a.`用户id`,a.登陆时间 as a_t ,b.登陆时间 as b_t
				from 登录信息 as a  
				left join 登录信息 as b
				on a.`用户id`=b.`用户id` and a.登陆时间 < b.登陆时间
		) as c
) as d
group by d.a_t; 

你可能感兴趣的:(SQL,sql,数据库,java)