-- 1083 方法2非常重要,透视列的思想
-- 1097 重点看方法2(可以直接得到installs), 暂时不用学习方法3
-- 1098 重新考虑一下,注意这里需要WHERE两个时间
-- 1107 方法2:subdate(date, INTERVAL 30 DAY)函数的使用;学会使用GROUP By替换DISTINCT
-- 1112 我认为补充的方法3应该会很好(正统思路,好好理解);方法1看一下,典型rank
-- 1126 注意MySQL在JOIN的时候,是可以使用范围关联的
-- 1127 SELECT 'desktop' AS platform FROM dual; (可以自己制造一个完整表)
-- 1142 看一下这个题,只需要整一个临时表:然后avg(number)即可
-- 1148 优化:一定要用GROUP By去处理Distinct问题
1075. 项目员工 I
简单题目,重点注意小数点控制方法即可
SELECT project_id, round(avg(experience_years),2) average_years
FROM
project p,employee e
WHERE p.employee_id=e.employee_id
GROUP by project_id
1076. 项目员工II
考虑并列的情况,自己对这种情况掌握还不够熟练
SELECT project_id
FROM
project p
GROUP by project_id
having count(*)=
(SELECT count(*) cnt
FROM
project
GROUP by project_id
ORDER BY cnt desc
limit 1)
# 自己的方法(我认为这个方法 最好) 但是没有跑通,好像是LeetCode排序有问题
# 该方法后续待定,好好思考一下 哪里不对
# 这个逻辑有一点问题: 假设第五名和第六名的人数相同,那么第六名也会标记为1
# 最后确定:是三个表的原因导致排序不稳定,可能方法2中多加了一个子查询是有道理的!
SELECT project_id,employee_id
FROM
(SELECT project_id,e.employee_id, (case when @pro=project_id and @year=experience_years then 1
when @pro=project_id then 0
else 1 end) cnt, @pro := project_id, @year := experience_years
FROM
project p,employee e,(SELECT @pro:=null, @year:=null) t
WHERE p.employee_id=e.employee_id
ORDER BY project_id asc, experience_years desc) t1
WHERE cnt=1;
# 方法2:0.5177 思路(确定project_id和并列的年限就可以啦!)
SELECT DISTINCT P.project_id, P.employee_id FROM project P INNER JOIN Employee E
ON P.employee_id = E.employee_id
INNER JOIN
(SELECT T1.project_id, T1.experience_years FROM(
SELECT
IF(@project = e.project_id,@rank := @rank +1,@rank := 1) AS rank,
@project := e.project_id,
e.*
FROM
(SELECT
@project := NULL,@rank := 0) r,
(SELECT P.project_id, P.employee_id, E.experience_years FROM Project P
INNER JOIN Employee E ON P.employee_id = E.employee_id) e
ORDER BY e.project_id ASC,e.experience_years DESC) T1
WHERE T1.rank=1) T2
ON E.experience_years = T2.experience_years AND P.project_id = T2.project_id
ORDER BY P.project_id, P.employee_id;
# 方法3:对方法2的改进 一个思路0.9624
SELECT p.project_id,e.employee_id
FROM
project p inner join employee e
ON
p.employee_id=e.employee_id
and
(p.project_id,e.experience_years) in
(select p.project_id,max(experience_years)
from
project p,employee e
where
p.employee_id=e.employee_id
group by p.project_id)
处理并列问题,大概就这两种方法
# 方法1 0.9536
select seller_id
from sales
group by seller_id
having sum(price)=
(
select sum(price) price
from sales
group by seller_id
order by price desc
limit 1
);
# 方法2 0.6954(可能不准)
select
seller_id
from sales
group by seller_id
having
sum(price)>=all(select sum(price) from sales group by seller_id )
# 自己的方法逻辑很差! 0.8700
select distinct buyer_id
from
product p join sales s
on p.product_id=s.product_id
where product_name='S8' and buyer_id not in (
select buyer_id
from product p join sales s
on p.product_id=s.product_id
where product_name='iPhone')
# 方法2: 数据透视表方法,逻辑清晰0.6426(虽然显示效果不好,但是我觉得不错)
SELECT a.buyer_id
FROM (
SELECT buyer_id, MAX(CASE
WHEN product_name = 'S8' THEN 1
ELSE 0
END) AS s8, MAX(CASE
WHEN product_name = 'iPhone' THEN 1
ELSE 0
END) AS iphone
FROM Sales a
LEFT JOIN Product b ON a.product_id = b.product_id
GROUP BY buyer_id
) a
WHERE s8 = 1
AND iphone=0
1084. 销售分析III
# 自己的方法 性能差 0.2394
select distinct p.product_id,p.product_name
from
product p join sales s
on p.product_id=s.product_id
where sale_date between '2019-01-01' and '2019-03-31'
and p.product_id not in
(select product_id from sales where sale_date <'2019-01-01'
union all
select product_id from sales where sale_date >'2019-03-31')
order by p.product_id;
# 方法2 94.36
select a.product_id
, Product.product_name
from
(
select distinct product_id
from Sales
where sale_date between date('2019-01-01') and date('2019-03-31')
)a
left join
(
select distinct product_id
from Sales
where sale_date<date('2019-01-01')
or sale_date>date('2019-03-31')
)b
on a.product_id = b.product_id
left join Product
on Product.product_id = a.product_id
where b.product_id is null
# 自己第一次就写对 0.7164
select t2.event_date install_dt, count(*) installs, round(count(a2.event_date)/count(*), 2) Day1_retention
from
(select player_id,event_date, (case when @player=player_id then @cnt:=@cnt+1
when @player:=player_id then @cnt:=1 end) cnt
from
activity a1,(select @cnt:=null, @player:=null) t1
order by player_id,event_date) t2
left join activity a2 on t2.player_id=a2.player_id and t2.event_date=a2.event_date-1
where t2.cnt=1
group by t2.event_date
# 方法2 其实获得最小日期 也可以这么做 0.7537
select a.install_dt,
count(a.player_id) as installs,
round(count(b.player_id)/ count(a.player_id),2) as Day1_retention
from
(
select player_id,min(event_date) as install_dt
from Activity
group by player_id
)
as a left join Activity as b
on a.player_id=b.player_id and b.event_date-a.install_dt=1
group by a.install_dt
# 牛逼的方法 直接没有使用join 0.9925 重点学习
# 最内层的临时表中,大佬比我多取了一列(上一次玩游戏的日期)
# 巧妙的把昨天 转换为 上一天!
SELECT event_date AS install_dt, COUNT(*) AS installs
, round(SUM(isCalc) / COUNT(*), 2) AS Day1_retention
FROM (
SELECT a.player_id, MIN(a.event_date) AS event_date, MAX(CASE WHEN a.rk = 2 AND DATE_ADD(lastDate, INTERVAL 1 DAY) = event_date
THEN 1 ELSE 0 END) AS isCalc
FROM
(SELECT a.player_id, a.event_date, a.lastDate, a.rk
FROM (
SELECT a.player_id, a.event_date, @preDate AS lastDate, CASE WHEN @prePid = a.player_id THEN @curRank := @curRank + 1
ELSE @curRank := 1 END AS rk, @prePid := player_id, @preDate := event_date
FROM Activity a, (SELECT @curRank := 0, @prePid := NULL, @preDate := NULL) r
ORDER BY a.player_id, a.event_date) a
WHERE a.rk = 1 OR a.rk = 2) a
GROUP BY a.player_id
) a
GROUP BY a.event_date;
1098. 小众书籍
本题遗漏了要求!是考虑最近一年内的订单!
# 0.9629 感觉逻辑很好,但是感觉not in还可以改进
select book_id,name
from Books
where book_id not in
(select b.book_id from Orders b where b.dispatch_date > '2018-06-23' and b.dispatch_date < '2019-06-23'
group by b.book_id having sum(b.quantity) >= 10)
and book_id not in
(select a.book_id from Books a where datediff('2019-06-23', a.available_from) < 30);
# 方法2 自己做的改进,但只有0.47 考虑一下为什么
select a.book_id,name
from
(select book_id, name from Books where available_from < '2019-05-23' ) a
left join
(select book_id, quantity from orders where dispatch_date>'2018-06-23' ) b
on a.book_id=b.book_id
group by a.book_id
having sum(quantity) < 10 or sum(quantity) is null
1107. 每日新用户统计
理解一个地方,这里的90天是指2019-06-30之前的90天。
# 方法1 这个逻辑本来是没问题的(如果这是用户所有的数据的话)
# 问题是 这个用户第一条数据不一定就是login(我确认过了,有的第一条数据是logout..)
# 争取完善一下这个思路
SELECT activity_date login_date, count(*) user_count
FROM
(SELECT activity, activity_date, if(@id=user_id,@cnt:=@cnt+1,@cnt:=1) cnt, @id:=user_id
FROM traffic,(SELECT @id:=0, @cnt:=0)t1
ORDER BY user_id,activity_date) t2
WHERE cnt=1 and activity_date >= Subdate('2019-06-30',INTERVAL 90 DAY)
GROUP BY activity_date
# 方法2 0.7292
SELECT login_date, COUNT(user_id) AS user_count
FROM
(SELECT user_id, MIN(activity_date) AS login_date
FROM Traffic
WHERE activity = 'login'
GROUP BY user_id) tmp
WHERE login_date >= Subdate('2019-06-30',INTERVAL 90 DAY)
GROUP BY login_date
ORDER BY login_date
# 受启发与方法2 修改方法1 0.5280
SELECT activity_date login_date, count(*) user_count
FROM
(SELECT activity, activity_date, if(@id=user_id,@cnt:=@cnt+1,@cnt:=1) cnt, @id:=user_id
FROM traffic,(SELECT @id:=0, @cnt:=0)t1
WHERE activity='login' # 增添内容
ORDER BY user_id,activity_date) t2
WHERE cnt=1 and activity_date >= Subdate('2019-06-30',INTERVAL 90 DAY)
GROUP BY activity_date
# 个人方法 100%
SELECT student_id,course_id,grade
FROM
(SELECT student_id,course_id,grade, if(@id=student_id,@cnt:=@cnt+1,@cnt:=1) cnt, @id:=student_id
FROM Enrollments,(SELECT @id:=null, @cnt:=null) t1
ORDER BY student_id,grade desc,course_id) t2
WHERE cnt=1
ORDER BY student_id
# 方法2 0.3265,注意其中的‘MIN(course_id)’ 处理并列的角度
SELECT student_id, MIN(course_id) AS course_id, grade
FROM
Enrollments
WHERE (student_id, grade) IN (SELECT student_id, MAX(grade)
FROM Enrollments
GROUP BY student_id)
GROUP BY student_id
ORDER BY student_id
2020-08-05补充方法3:
SELECT student_id, MIN(course_id) course_id, MAX(grade)
FROM Enrollments e JOIN
(SELECT student_id, MAX(grade) FROM Enrollments GROUP BY student_id) t
ON e.student_id=t.student_id AND e.grade=t.grade
GROUP BY student_id
ORDER BY student_id;
1113. Reported Posts
表没有主键,因此可能会存在重复值。列action是枚举类型,取值可能为(‘view’, ‘like’, ‘reaction’, ‘comment’, ‘report’, ‘share’)。列extra是关于action的可选信息,例如报道的原因或者行动的类型。
需要查询的是post_id不重复的个数。。没读懂英语
# 这是一道简单题,不需要过分关注
SELECT extra AS report_reason,
COUNT(DISTINCT post_id) AS report_count
FROM Actions
WHERE action_date = '2019-07-4' AND action = 'report'
GROUP BY extra
1126. Active Businesses
总结一下:这道题就是查找牛逼的企业(至少有 两项过硬的业务)
# 方法1 0.6970
SELECT business_id
FROM
events a
join
(SELECT event_type, avg(occurences) avg
FROM events group by event_type) b
ON a.event_type=b.event_type and a.occurences>b.avg
group by business_id
having count(*)>1
1127. User Purchase Platform
该表格记录了用户的在线购物网站购物的消费历史。
编写一个SQL查询来查找每个日期的用户总数和仅使用移动设备、仅使用桌面设备以及同时使用移动设备和桌面设备的总开销。
# 自己第一种方法(错),自己想想有多蠢。。。
SELECT spend_date,platform,sum(amount) tltal_amount, count(distinct user_id) total_users
FROM
spending
GROUP by spend_date,platform
# 个人第二种方法,但是没法输出:' 2019-07-02 | both | 0 | 0 '
SELECT spend_date,platform,sum(amount) tltal_amount, count(distinct user_id) total_users
FROM
(SELECT user_id,spend_date,'both' platform,sum(amount) amount
FROM
spending
GROUP by spend_date,user_id
having count(*)=2
union all
SELECT user_id,spend_date, platform,sum(amount) amount
FROM
spending
GROUP by spend_date,user_id
having count(*)=1) t
GROUP by spend_date,platform
ORDER BY platform
# 方法3 不知道哪里错了,而且思路有点混乱(不建议)
SELECT spend_date,platform,ifnull(sum(amount),0) tltal_amount, count(amount) total_users
FROM
(SELECT user_id,spend_date,'both' platform,if(count(*)=1,null,sum(amount)) amount
FROM
spending
GROUP by spend_date,user_id
union all
SELECT user_id,spend_date, platform,sum(amount) amount
FROM
spending
GROUP by spend_date,user_id
having count(*)=1) t
GROUP by spend_date,platform
# 方法4 0.97 逻辑非常清晰
# 整体上分为两个表连接,其中第二个表中嵌套两层子查询。
# 最内容等于做了一个分类,之后完成表的转换(platform由两类变为三类)
SELECT a.spend_date, a.platform, ifnull(b.total_amount, 0) AS total_amount
, ifnull(b.total_users, 0) as total_users
FROM (
SELECT *
FROM
(SELECT 'desktop' AS platform
FROM dual
UNION
SELECT 'mobile' AS platform
FROM dual
UNION
SELECT 'both' AS platform
FROM dual) a
CROSS JOIN
(SELECT DISTINCT spend_date
FROM Spending) b
) a
LEFT JOIN
(SELECT a.spend_date, a.platform, SUM(allAmount) AS total_amount, COUNT(*) AS total_users
FROM
(SELECT a.user_id, a.spend_date, allAmount, CASE WHEN mobile = 1 AND desktop = 1 THEN 'both'
WHEN desktop = 1 THEN 'desktop'
ELSE 'mobile' END AS platform
FROM
(SELECT a.user_id, a.spend_date, MAX(CASE WHEN platform = 'mobile' THEN 1 ELSE 0 END) AS mobile
, MAX(CASE WHEN platform = 'desktop' THEN 1 ELSE 0 END) AS desktop
, SUM(amount) AS allAmount
FROM Spending a
GROUP BY a.user_id, a.spend_date) a ) a
GROUP BY a.spend_date, a.platform) b
ON a.platform = b.platform AND a.spend_date = b.spend_date;
# 方法5 修改了‘转换表的方式’ 100% 但是和第一种方法应该差不多
SELECT a.spend_date, a.platform, ifnull(b.total_amount, 0) AS total_amount
, ifnull(b.total_users, 0) as total_users
FROM (
SELECT *
FROM
(SELECT 'desktop' AS platform
FROM dual
UNION
SELECT 'mobile' AS platform
FROM dual
UNION
SELECT 'both' AS platform
FROM dual) a
CROSS JOIN
(SELECT DISTINCT spend_date
FROM Spending) b
) a
LEFT JOIN
(SELECT a.spend_date, a.platform, SUM(allAmount) AS total_amount, COUNT(*) AS total_users
FROM
(SELECT a.user_id, a.spend_date, CASE WHEN COUNT(DISTINCT platform) = 2 THEN 'both'
ELSE MAX(platform) END AS platform, SUM(amount) AS allAmount
FROM Spending a
GROUP BY a.user_id, a.spend_date ) a
GROUP BY a.spend_date, a.platform) b
ON a.platform = b.platform AND a.spend_date = b.spend_date;
# 仅仅依靠下面的代码就完成了表的转换
SELECT a.user_id, a.spend_date, CASE WHEN COUNT(DISTINCT platform) = 2 THEN 'both'
ELSE MAX(platform) END AS platform, SUM(amount) AS allAmount
FROM Spending a
GROUP BY a.user_id, a.spend_date
1132. Reported Posts II
查询每天被诊断为垃圾邮件后的删除率的平均值,并保留两位小数。
# 第一次方法 0.2688
SELECT round((avg(average_daily_percent)*100),2) average_daily_percent
FROM
(SELECT count( DISTINCT b.post_id)/count(DISTINCT a.post_id) average_daily_percent
FROM
Actions a left join removals b
ON a.post_id=b.post_id
WHERE extra='spam'
GROUP by a.action_date) T
select ifnull(round(avg(number),2),0.00) average_sessions_per_user
from
(select user_id, count(distinct session_id) number
from activity
where activity_date > '2019-06-27' and activity_date <='2019-07-27'
group by user_id ) t
这是一道简单题,但是很久没做出来,因为忽略了一个重要条件:请注意,每个会话完全属于一个用户。
# 注意group by方法 也可以去掉重复值,而且比distinct更好
# 方法1 0.84
select distinct viewer_id id
from views
where viewer_id=author_id
order by
# 方法2 0.92
select viewer_id as id
from Views
where viewer_id = author_id
group by viewer_id
order by viewer_id