LeeCode数据库部分SQL题目总结

176. 第二高的薪水

描述

编写一个 SQL 查询,获取 Employee 表中第二高的薪水(Salary)

Id Salary
1 100
2 200
3 300

例如上述 Employee 表,SQL查询应该返回 200 作为第二高的薪水。如果不存在第二高的薪水,那么查询应返回 null

SecondHighestSalary
200

数据准备

Create table If Not Exists Employee (Id int, Salary int);
Truncate table Employee;
insert into Employee (Id, Salary) values ('1', '100');
insert into Employee (Id, Salary) values ('2', '200');
insert into Employee (Id, Salary) values ('3', '300');

SQL语句

SELECT MAX(Salary) SecondHighestSalary
FROM Employee
WHERE Salary <
(SELECT MAX(Salary) FROM Employee)

178.分数排名

描述

编写一个 SQL 查询来实现分数排名。如果两个分数相同,则两个分数排名(Rank)相同。请注意,平分后的下一个名次应该是下一个连续的整数值。换句话说,名次之间不应该有“间隔”。

Id Score
1 3.50
2 3.65
3 4.00
4 3.85
5 4.00
6 3.65

例如,根据上述给定的 Scores 表,你的查询应该返回(按分数从高到低排列):

Score Rank
4.00 1
4.00 1
3.85 2
3.65 3
3.65 3
3.50 4

数据准备

Create table If Not Exists Scores (Id int, Score DECIMAL(3,2));
Truncate table Scores;
insert into Scores (Id, Score) values ('1', '3.5');
insert into Scores (Id, Score) values ('2', '3.65');
insert into Scores (Id, Score) values ('3', '4.0');
insert into Scores (Id, Score) values ('4', '3.85');
insert into Scores (Id, Score) values ('5', '4.0');
insert into Scores (Id, Score) values ('6', '3.65');

SQL语句

SELECT
Score,
@rank := @rank + (@prev <> (@prev := Score)) Rank
FROM
Scores,
(SELECT @rank := 0, @prev := -1) init
ORDER BY Score desc

180. 连续出现的数字

描述

编写一个 SQL 查询,查找所有至少连续出现三次的数字。

Id Num
1 1
2 1
3 1
4 2
5 1
6 2
7 2

例如,给定上面的 Logs 表, 1 是唯一连续出现至少三次的数字。

ConsecutiveNums
1

数据准备

Create table If Not Exists Logs (Id int, Num int);
Truncate table Logs;
insert into Logs (Id, Num) values ('1', '1');
insert into Logs (Id, Num) values ('2', '1');
insert into Logs (Id, Num) values ('3', '1');
insert into Logs (Id, Num) values ('4', '2');
insert into Logs (Id, Num) values ('5', '1');
insert into Logs (Id, Num) values ('6', '2');
insert into Logs (Id, Num) values ('7', '2');

SQL语句

SELECT DISTINCT l1.Num ConsecutiveNums
FROM Logs l1,
Logs l2,
Logs l3
WHERE l1.Id=l2.Id-1
AND l2.Id =l3.Id-1
AND l1.Num =l2.Num
AND l2.Num =l3.Num

181. 超过经理收入的员工

描述

Employee 表包含所有员工,他们的经理也属于员工。每个员工都有一个 Id,此外还有一列对应员工的经理的 Id。

Id Name Salary ManagerId
1 Joe 70000 3
2 Henry 80000 4
3 Sam 60000 null
4 Max 90000 null

给定 Employee 表,编写一个 SQL 查询,该查询可以获取收入超过他们经理的员工的姓名。在上面的表格中,Joe 是唯一一个收入超过他的经理的员工

Employee
Joe

数据准备

Create table If Not Exists Employee (Id int, Name varchar(255), Salary int, ManagerId int);
Truncate table Employee;
insert into Employee (Id, Name, Salary, ManagerId) values ('1', 'Joe', '70000', '3');
insert into Employee (Id, Name, Salary, ManagerId) values ('2', 'Henry', '80000', '4');
insert into Employee (Id, Name, Salary, ManagerId) values ('3', 'Sam', '60000', 'None');
insert into Employee (Id, Name, Salary, ManagerId) values ('4', 'Max', '90000', 'None');

SQL语句

SELECT
     a.NAME AS Employee
FROM Employee AS a JOIN Employee AS b
     ON a.ManagerId = b.Id
     AND a.Salary > b.Salary
;

182. 查找重复的电子邮箱

描述

编写一个 SQL 查询,查找 Person 表中所有重复的电子邮箱。示例:

Id Email
1 [email protected]
2 [email protected]
3 [email protected]

根据以上输入,你的查询应返回以下结果:

Email
[email protected]

说明:所有电子邮箱都是小写字母。

数据准备

Create table If Not Exists Person (Id int, Email varchar(255));
Truncate table Person;
insert into Person (Id, Email) values ('1', '[email protected]');
insert into Person (Id, Email) values ('2', '[email protected]');
insert into Person (Id, Email) values ('3', '[email protected]');

SQL语句

-- 方法1:
select Email from
(
  select Email, count(Email) as num
  from Person
  group by Email
) as statistic
where num > 1
;
-- 方法2
select Email
from Person
group by Email
having count(Email) > 1;
-- 方法3
select
     distinct(P1.Email) 'Email'
from
      Person P1,
      Person P2
where P1.Id <> P2.Id and P1.Email = P2.Email

183. 从不订购的客户

描述

某网站包含两个表,Customers 表和 Orders 表。编写一个 SQL 查询,找出所有从不订购任何东西的客户。

Customers 表:

Id Name
1 Joe
2 Henry
3 Sam
4 Max

Orders 表:

Id CustomerId
1 3
2 1

例如给定上述表格,你的查询应返回:

Customers
Henry
Max

数据准备

Create table If Not Exists Customers (Id int, Name varchar(255));
Create table If Not Exists Orders (Id int, CustomerId int);
Truncate table Customers;
insert into Customers (Id, Name) values ('1', 'Joe');
insert into Customers (Id, Name) values ('2', 'Henry');
insert into Customers (Id, Name) values ('3', 'Sam');
insert into Customers (Id, Name) values ('4', 'Max');
Truncate table Orders;
insert into Orders (Id, CustomerId) values ('1', '3');
insert into Orders (Id, CustomerId) values ('2', '1');

SQL语句

-- 方法1:
SELECT
    a.NAME 'Customers' 
FROM
    Customers a
    LEFT JOIN Orders b ON a.Id = b.CustomerId 
WHERE
    b.Id IS NULL
-- 方法2:
SELECT NAME
    'Customers' 
FROM
    Customers 
WHERE
    Id NOT IN ( SELECT CustomerId FROM Orders )

184. 部门工资最高的员工

描述

Employee 表包含所有员工信息,每个员工有其对应的 Id, salary 和 department Id。

Id Name Salary DepartmentId
1 Joe 70000 1
2 Henry 80000 2
3 Sam 60000 2
4 Max 90000 1

Department 表包含公司所有部门的信息。

Id Name
1 IT
2 Sales

编写一个 SQL 查询,找出每个部门工资最高的员工。例如,根据上述给定的表格,Max 在 IT 部门有最高工资,Henry 在 Sales 部门有最高工资。

Department Employee Salary
IT Max 90000
Sales Henry 80000

数据准备

Create table If Not Exists Employee (Id int, Name varchar(255), Salary int, DepartmentId int);
Create table If Not Exists Department (Id int, Name varchar(255));
Truncate table Employee;
insert into Employee (Id, Name, Salary, DepartmentId) values ('1', 'Joe', '70000', '1');
insert into Employee (Id, Name, Salary, DepartmentId) values ('2', 'Henry', '80000', '2');
insert into Employee (Id, Name, Salary, DepartmentId) values ('3', 'Sam', '60000', '2');
insert into Employee (Id, Name, Salary, DepartmentId) values ('4', 'Max', '90000', '1');
Truncate table Department;
insert into Department (Id, Name) values ('1', 'IT');
insert into Department (Id, Name) values ('2', 'Sales');

SQL语句

SELECT
    Department.name AS 'Department',
    Employee.name AS 'Employee',
    Salary
FROM
    Employee
        JOIN
    Department ON Employee.DepartmentId = Department.Id
WHERE
    (Employee.DepartmentId , Salary) IN
    (   SELECT
            DepartmentId, MAX(Salary)
        FROM
            Employee
        GROUP BY DepartmentId
    )
;

185.部门工资前三高的所有员工

描述

Employee 表包含所有员工信息,每个员工有其对应的工号 Id,姓名 Name,工资 Salary 和部门编号 DepartmentId 。

Id Name Salary DepartmentId
1 Joe 85000 1
2 Henry 80000 2
3 Sam 60000 2
4 Max 90000 1
5 Janet 69000 1
6 Randy 85000 1
7 Will 70000 1

Department 表包含公司所有部门的信息。

Id Name
1 IT
2 Sales

编写一个 SQL 查询,找出每个部门获得前三高工资的所有员工。例如,根据上述给定的表,查询结果应返回:

Department Employee Salary
IT Max 90000
IT Randy 85000
IT Joe 85000
IT Will 70000
Sales Henry 80000
Sales Sam 60000

解释:

IT 部门中,Max 获得了最高的工资,Randy 和 Joe 都拿到了第二高的工资,Will 的工资排第三。销售部门(Sales)只有两名员工,Henry 的工资最高,Sam 的工资排第二。

数据准备

Create table If Not Exists Employee (Id int, Name varchar(255), Salary int, DepartmentId int);
Create table If Not Exists Department (Id int, Name varchar(255));
Truncate table Employee;
insert into Employee (Id, Name, Salary, DepartmentId) values ('1', 'Joe', '85000', '1');
insert into Employee (Id, Name, Salary, DepartmentId) values ('2', 'Henry', '80000', '2');
insert into Employee (Id, Name, Salary, DepartmentId) values ('3', 'Sam', '60000', '2');
insert into Employee (Id, Name, Salary, DepartmentId) values ('4', 'Max', '90000', '1');
insert into Employee (Id, Name, Salary, DepartmentId) values ('5', 'Janet', '69000', '1');
insert into Employee (Id, Name, Salary, DepartmentId) values ('6', 'Randy', '85000', '1');
insert into Employee (Id, Name, Salary, DepartmentId) values ('7', 'Will', '70000', '1');
Truncate table Department;
insert into Department (Id, Name) values ('1', 'IT');
insert into Department (Id, Name) values ('2', 'Sales');

SQL语句

SELECT
d.Name AS 'Department', e1.Name AS 'Employee', e1.Salary
FROM
Employee e1
JOIN
Department d ON e1.DepartmentId = d.Id
WHERE -- 相关子查询,父查询传递一个元祖到子查询,遍历子查询的的数据,如果满足不超过3个人的工资大于传过来的工资,则保留该元祖的数据,否则就过滤掉
3 > (SELECT
COUNT(DISTINCT e2.Salary) -- 对于重复的工资,计数一次,从而保证相同的工资的排名相同
FROM
Employee e2
WHERE
e2.Salary > e1.Salary
AND e1.DepartmentId = e2.DepartmentId
)

196.删除重复的邮箱

描述

编写一个 SQL 查询,来删除 Person 表中所有重复的电子邮箱,重复的邮箱里只保留 Id 最小 的那个。

Id Email
1 [email protected]
2 [email protected]
3 [email protected]

Id 是这个表的主键。
例如,在运行你的查询语句之后,上面的 Person 表应返回以下几行:

Id Email
1 [email protected]
2 [email protected]

数据准备

Create table If Not Exists Person (Id int,Email varchar(20));
Truncate table Person;
insert into Person values ('1',  '[email protected]');
insert into Person values ('2',  '[email protected]');
insert into Person values ('3',  '[email protected]');

SQL语句

DELETE p1.* 
FROM
    Person p1,
    Person p2 
WHERE
    p1.Email = p2.Email 
    AND p1.Id > p2.Id

197.上升的温度

描述

给定一个 Weather 表,编写一个 SQL 查询,来查找与之前(昨天的)日期相比温度更高的所有日期的 Id。

Id(INT) RecordDate(DATE) Temperature(INT)
1 2015-01-01 10
2 2015-01-02 25
3 2015-01-03 20
4 2015-01-04 30

例如,根据上述给定的 Weather 表格,返回如下 Id:

id
2
4

数据准备

Create table If Not Exists Weather (Id int, Date date, Temperature int);
Truncate table Weather;
insert into Weather (Id, Date, Temperature) values ('1', '2015-01-01', '10');
insert into Weather (Id, Date, Temperature) values ('2', '2015-01-02', '25');
insert into Weather (Id, Date, Temperature) values ('3', '2015-01-03', '20');
insert into Weather (Id, Date, Temperature) values ('4', '2015-01-04', '30');

SQL语句

SELECT
    a.Id 
FROM
    Weather a
    JOIN Weather b ON DATEDIFF(a.RecordDate,b.RecordDate) = 1
WHERE
    a.Temperature > b.Temperature

262.行程与用户

题目描述

Trips 表中存所有出租车的行程信息。每段行程有唯一键 Id,Client_Id 和 Driver_Id 是 Users 表中 Users_Id 的外键。Status 是枚举类型,枚举成员为 (‘completed’, ‘cancelled_by_driver’, ‘cancelled_by_client’)。

Id Client_Id Driver_Id City_Id Status Request_at
1 1 10 1 completed 2013-10-01
2 2 11 1 cancelled_by_driver 2013-10-01
3 3 12 6 cancelled 2013-10-01
4 4 13 6 cancelled_by_client 2013-10-01
5 1 10 1 completed 2013-10-02
6 2 11 6 completed 2013-10-02
7 3 12 6 completed 2013-10-02
8 2 12 12 completed 2013-10-03
9 3 10 12 completed 2013-10-03
10 4 13 12 cancelled_by_driver 2013-10-03

Users 表存所有用户。每个用户有唯一键 Users_Id。Banned 表示这个用户是否被禁止,Role 则是一个表示(‘client’, ‘driver’, ‘partner’)的枚举类型。

Users_Id Banned Role
1 No client
2 Yes client
3 No client
4 No client
10 No driver
11 No driver
12 No driver
13 No driver

写一段 SQL 语句查出 2013年10月1日 至 2013年10月3日 期间非禁止用户的取消率。基于上表,你的 SQL 语句应返回如下结果,取消率(Cancellation Rate)保留两位小数。

取消率的计算方式如下:(被司机或乘客取消的非禁止用户生成的订单数量) / (非禁止用户生成的订单总数)

Day Cancellation Rate
2013-10-01 0.33
2013-10-02 0.00
2013-10-03 0.50

数据准备

Create table If Not Exists Trips (Id int, Client_Id int, Driver_Id int,
City_Id int, Status ENUM('completed', 'cancelled_by_driver', 'cancelled_by_client'),
Request_at varchar(50));
Create table If Not Exists Users (Users_Id int,
Banned varchar(50), Role ENUM('client', 'driver', 'partner'));
Truncate table Trips;
insert into Trips (Id, Client_Id, Driver_Id, City_Id, Status,
Request_at) values ('1', '1', '10', '1', 'completed', '2013-10-01');
insert into Trips (Id, Client_Id, Driver_Id, City_Id, Status,
Request_at) values ('2', '2', '11', '1', 'cancelled_by_driver', '2013-10-01');
insert into Trips (Id, Client_Id, Driver_Id, City_Id, Status,
Request_at) values ('3', '3', '12', '6', 'completed', '2013-10-01');
insert into Trips (Id, Client_Id, Driver_Id, City_Id, Status,
Request_at) values ('4', '4', '13', '6', 'cancelled_by_client', '2013-10-01');
insert into Trips (Id, Client_Id, Driver_Id, City_Id, Status,
Request_at) values ('5', '1', '10', '1', 'completed', '2013-10-02');
insert into Trips (Id, Client_Id, Driver_Id, City_Id, Status,
Request_at) values ('6', '2', '11', '6', 'completed', '2013-10-02');
insert into Trips (Id, Client_Id, Driver_Id, City_Id, Status,
Request_at) values ('7', '3', '12', '6', 'completed', '2013-10-02');
insert into Trips (Id, Client_Id, Driver_Id, City_Id, Status,
Request_at) values ('8', '2', '12', '12', 'completed', '2013-10-03');
insert into Trips (Id, Client_Id, Driver_Id, City_Id, Status,
Request_at) values ('9', '3', '10', '12', 'completed', '2013-10-03');
insert into Trips (Id, Client_Id, Driver_Id, City_Id, Status,
Request_at) values ('10', '4', '13', '12', 'cancelled_by_driver', '2013-10-03');
Truncate table Users;
insert into Users (Users_Id, Banned, Role) values ('1', 'No', 'client');
insert into Users (Users_Id, Banned, Role) values ('2', 'Yes', 'client');
insert into Users (Users_Id, Banned, Role) values ('3', 'No', 'client');
insert into Users (Users_Id, Banned, Role) values ('4', 'No', 'client');
insert into Users (Users_Id, Banned, Role) values ('10', 'No', 'driver');
insert into Users (Users_Id, Banned, Role) values ('11', 'No', 'driver');
insert into Users (Users_Id, Banned, Role) values ('12', 'No', 'driver');
insert into Users (Users_Id, Banned, Role) values ('13', 'No', 'driver');

SQL语句

# 方法1:
SELECT
    temp1.Request_at AS DAY,
IF
    (
    cast( ( temp2.cancelled_order / temp1.total_order ) AS DECIMAL ( 3, 2 ) ) IS NULL,
    0.00,
    cast( ( temp2.cancelled_order / temp1.total_order ) AS DECIMAL ( 3, 2 ) ) 
    ) AS 'Cancellation Rate' 
FROM
    (
SELECT
    t1.Request_at,
    count( * ) AS total_order 
FROM
    ( SELECT * FROM Trips WHERE Request_at >= '2013-10-01' AND Request_at <= '2013-10-03' ) t1
    JOIN ( SELECT * FROM Users WHERE Banned = 'NO' ) t2 ON t1.Client_Id = t2.Users_Id 
GROUP BY
    t1.Request_at 
    ) temp1
    LEFT JOIN (
SELECT
    t1.Request_at,
    count( * ) AS cancelled_order 
FROM
    ( SELECT * FROM Trips WHERE Request_at >= '2013-10-01' AND Request_at <= '2013-10-03' AND ( STATUS = 'cancelled_by_driver' OR STATUS = 'cancelled_by_client' ) ) t1
    JOIN ( SELECT * FROM Users WHERE Banned = 'NO' ) t2 ON t1.Client_Id = t2.Users_Id 
GROUP BY
    t1.Request_at 
    ) temp2 ON temp1.Request_at = temp2.Request_at
    -- ---------------------------------------------------------
    # 方法2:
    SELECT
    temp.request_at DAY,
    round( sum( CASE temp.STATUS WHEN 'completed' THEN 0 ELSE 1 END ) / count( temp.STATUS ), 2 ) 'Cancellation Rate' 
FROM
    ( SELECT STATUS, request_at FROM trips t LEFT JOIN users u ON t.client_id = u.users_id WHERE u.banned = 'no' ) temp 
WHERE
    request_at BETWEEN '2013-10-01' 
    AND '2013-10-03' 
GROUP BY
    temp.request_at

511.游戏玩家分析I

描述

找出每个玩家第一次登录的日期。Activity表如下:

player_id device_id event_date games_played
1 2 2016-03-01 5
1 2 2016-03-02 6
2 3 2017-06-25 1
3 1 2016-03-02 0
3 4 2018-07-03 5

结果Result表如下:

player_id first_login
1 2016-03-01
2 2017-06-25
3 2016-03-02

数据准备

Create table If Not Exists activity(player_id int,device_id int,event_date date,games_played int);
Truncate table activity;
insert into activity values (1,2,'2016-03-01',5);
insert into activity values (1,2,'2016-03-02',6);
insert into activity values (2,3,'2017-06-25',1);
insert into activity values (3,1,'2016-03-02',0);
insert into activity values (3,4,'2018-07-03',5);

SQL语句

select player_id,min(event_date) first_login from activity group by player_id ;

512. 游戏玩家分析II

描述

显示每个玩家首次登录的设备号(同时显示玩家ID)。

数据准备

见511题

SQL语句

SELECT
    player_id,
    device_id 
FROM
    activity 
WHERE
    ( player_id, event_date ) IN ( SELECT player_id, min( event_date ) first_login FROM activity GROUP BY player_id )

534 游戏玩家分析III

描述

编写一个 SQL 查询,同时显示每组玩家、日期以及玩家到目前为止玩了多少游戏。也就是说,在此日期之前玩家所玩的游戏总数。详细情况请查看示例。

结果为:

player_id event_date games_played_so_far
1 2016-03-01 5
1 2016-03-02 11
1 2017-06-25 1
3 2016-03-02 0
3 2018-07-03 5

数据准备

见511题

SQL语句

-- 方法一
SELECT
    B.player_id,
    B.event_date,
    SUM( A.games_played ) AS `games_played_so_far` 
FROM
    Activity AS A
    JOIN Activity AS B ON ( A.player_id = B.player_id AND A.event_date <= B.event_date ) 
GROUP BY
    B.player_id,
    B.event_date
-- 方法二
SELECT C.player_id,C.event_date,C.games_played_so_far
FROM (
SELECT 
    A.player_id,
    A.event_date,
@sum_cnt:=
        if(A.player_id = @pre_id AND A.event_date != @pre_date,
            @sum_cnt + A.games_played,
            A.games_played 
        )
        AS `games_played_so_far`,
@pre_id:=A.player_id AS `player_ids`,
@pre_date:=A.event_date AS `event_dates`

FROM 
activity AS A,(SELECT @pre_id:=NULL,@pre_date:=NULL,@sum_cnt:=0) AS B
order BY A.player_id,A.event_date
) AS C

550 游戏玩家分析IV

描述

列出首次登录后,紧接着第二天又登录的人数占总人数的比例。比如511题中的数据,只有玩家1连续两天登录了,而总玩家有3个,所以连着两天登录的用户比例为:1/3 ~0.33

数据准备

见511题

SQL语句

SELECT
    ROUND(
    (
    -- 求第二天连续登陆的用户数
SELECT
    count( DISTINCT player_id ) AS con_cnt 
FROM
    (
SELECT
    a.player_id,
    DATEDIFF( b.event_date, a.event_date ) AS diff 
FROM
    activity a
    JOIN activity b ON ( a.player_id = b.player_id AND a.event_date < b.event_date ) 
    ) t1 
WHERE
    diff = 1 
    ) / ( SELECT count( DISTINCT player_id ) total_cnt FROM activity ),-- 总用户数
    2 
    ) fraction

569 员工薪水中位数

描述

有一张员工表Employees,字段为Id,Name,Salary,其中Id为员工Id,Name为公司名称,Salary为员工工资。如下面数据所示:

Id Company Salary
1 A 2341
2 A 341
3 A 15
4 A 15314
5 A 451
6 A 513
7 B 15
8 B 13
9 B 1154
10 B 1345
11 B 1221
12 B 234
13 C 2345
14 C 2645
15 C 2645
16 C 2652
17 C 65

请编写SQL查询来查找每个公司的薪水中位数。结果如下:

Id Company Salary
5 A 451
6 A 513
12 B 234
9 B 1154
14 C 2645

数据准备

drop  table if exists employees;
Create table employees(Id int,Company varchar(2),salary int);
insert into employees values(1,'A',2341);
insert into employees values(2,'A',341);
insert into employees values(3,'A',15);
insert into employees values(4,'A',15314);
insert into employees values(5,'A',451);
insert into employees values(6,'A',513);
insert into employees values(7,'B',15);
insert into employees values(8,'B',13);
insert into employees values(9,'B',1154);
insert into employees values(10,'B',1345);
insert into employees values(11,'B',1221);
insert into employees values(12,'B',234);
insert into employees values(13,'C',2345);
insert into employees values(14,'C',2645);
insert into employees values(15,'C',2645);
insert into employees values(16,'C',2652);
insert into employees values(17,'C',65);

SQL语句

select
     t1.id,
     t1.company,
     t1.salary
from

(
-- 查询每个公司员工薪水排名
select
     id,
     company,
     salary,
     @num := if( @company =company  ,@num + 1,1) as rank,
     @company := company
from employees a ,(select @num := 0,@company:="") b
order by company,salary) t1 
join
(
-- 查询每个公司有多少个员工
select
     company,
     count(*) as cnt
from
    employees
group by company

) t2 on t1.company= t2.company and t1.rank = (t2.cnt + 1) div 2 -- (员工总数+1)/2 为中位数

公众号『大数据技术与数仓』,回复『资料』领取大数据资料包