1.查询所有列
select
*
from user_profile
2.查询多列
select
device_id,
gender,
age,
university
from user_profile
3.查询结果去重
提供两种方式:分组和distinct函数去重
select
university
from user_profile
group by university
select
distinct(university)
from user_profile
4.查询结果限制返回行数
select
device_id
from user_profile limit 2
5.将查询后的列重新命名
select
device_id user_infos_example
from user_profile limit 2
6.查找学校是北大的学生信息
select
device_id,
university
from user_profile
where university = '北京大学'
7.查找年龄大于24岁的用户信息
select
device_id,
gender,
age,
university
from user_profile
where age > 24
8.查找某个年龄段的用户信息
select
device_id,
gender,
age
from user_profile
where age >= 20
and age <= 23
9.查找除复旦大学的用户信息
select
device_id,
gender,
age,
university
from user_profile
where university not in ("复旦大学")
10.用where过滤空值练习
select
device_id,
gender,
age,
university
from user_profile
where age != 'null'
11.高级操作符练习(1)
select
device_id,
gender,
age,
university,
gpa
from user_profile
where gender = 'male'
and gpa > 3.5
12.高级操作符练习(2)
select
device_id,
gender,
age,
university,
gpa
from user_profile
where university = "北京大学"
or gpa > "3.7"
13.Where in 和Not in
select
device_id,
gender,
age,
university,
gpa
from user_profile
where university = "北京大学"
or university = "复旦大学"
or university = "山东大学"
14.操作符混合运用
select
device_id,
gender,
age,
university,
gpa
from user_profile
where
(university = "山东大学" AND gpa > "3.5")
or
(university = "复旦大学" AND gpa > "3.8")
15.查看学校名称中含北京的用户
select
device_id,
age,
university
from user_profile
where university like "%北京%"
16.查找GPA最高值
select
max(gpa) as gpa
from user_profile
where university = "复旦大学"
17.计算男生人数以及平均GPA
select
count(gender) as male_num,
AVG(gpa) as avg_gpa
from user_profile
where
gender = "male"
18.分组计算练习题
select
gender,
university,
count(gender) as user_num,
AVG(active_days_within_30) as avg_active_day,
AVG(question_cnt) as avg_question_cnt
from user_profile
group by gender,university
19.分组过滤练习题
select
university,
AVG(question_cnt) as avg_question_cnt,
AVG(answer_cnt) as avg_answer_cnt
from user_profile
group by university
having avg_question_cnt < "5" or avg_answer_cnt < "20"
20.分组排序练习题
select
university,
AVG(question_cnt) as avg_question_cnt
from user_profile
group by university
order by avg_question_cnt
21.浙江大学用户题目回答情况
select
question_practice_detail.device_id,
question_practice_detail.question_id,
question_practice_detail.result
from question_practice_detail
left join user_profile
on question_practice_detail.device_id = user_profile.device_id
where university = "浙江大学"
order by question_id
22.统计每个学校的答过题的用户的平均答题数
select
university,
count(university) / count(distinct(user_profile.device_id)) as avg_answer_cnt
from user_profile
join question_practice_detail
on question_practice_detail.device_id = user_profile.device_id
group by university
order by university
23.统计每个学校各难度的用户平均刷题数
select
university,
difficult_level,
count(result) / count(distinct(question_practice_detail.device_id)) as avg_answer_cnt
from user_profile
inner join question_practice_detail on user_profile.device_id = question_practice_detail.device_id
inner join question_detail on question_practice_detail.question_id = question_detail.question_id
group by university,difficult_level
24.统计每个用户的平均刷题数
select
university,
difficult_level,
count(result) / count(distinct(question_practice_detail.device_id)) as avg_answer_cnt
from user_profile
inner join question_practice_detail on user_profile.device_id = question_practice_detail.device_id
inner join question_detail on question_practice_detail.question_id = question_detail.question_id
where university = "山东大学"
group by university,difficult_level
25.查找山东大学或者性别为男生的信息
select
device_id,
gender,
age,
gpa
from user_profile
where university = "山东大学"
union all
select
device_id,
gender,
age,
gpa
from user_profile
where gender = "male"
26.计算25岁以上和以下的用户数量
开始涉及case语句,建议了解简单case语句与搜索case语句
select
case
when age is null then '25岁以下'
when age < "25" then '25岁以下'
when age >= '25' then '25岁及以上'
end as age_cut,count(*)number
from user_profile
group by age_cut
27.查看不同年龄段的用户明细
select
device_id,
gender,
case
when age is null then "其他"
when age <20 then "20岁以下"
when age >= 20 and age <=24 then "20-24岁"
when age >= 25 then "25岁及以上"
end age_cut
from user_profile
28.计算用户8月每天的练题数量
开始涉及日期的相关函数,建议了解sql的DATE与DATE相关函数
select
DATE_FORMAT(date,'%e') as day,
count(result) as question_cnt
from question_practice_detail
where date between "2021-08-01" and "2021-08-31"
group by day
29.计算用户的平均次日留存率
思路:平均留存率=第二天接着访问的用户数量 / 第一天所有的访问的用户数量
1.去重,因为一个用户一天可以有多次访问记录,计算留存率只需要记录用户id即可
2.构造两个表,从第一个表中抽出去重后的所有访问的用户数量(当成第一天),用第二个表和第一个表进行连接,并且让第二张表的date+1 = 第一张表的date,从连接的表中抽出第二天接着访问的用户数量,实际上count中的抽数都是在left join后的表中,由于left join的原因,在连接后的表中,第一张表的device_id为q1的device_id,第二张表的device_id由于left join的原因,没能和第一张表连接的device_id已经变为none,而count不记录none值
select
count(q2.device_id) / count(q1.device_id) as avg_ret
from
(select distinct device_id, date from question_practice_detail) as q1
left join
(select distinct device_id, date from question_practice_detail) as q2
on q1.device_id = q2.device_id and q1.date = date_add(q2.date,interval 1 day)
30.统计每种性别的人数
select
if(profile like '%female%',"female","male") as gender,
count(*) as number
from user_submit
where 1=1
group by gender
31.提取博客URL中的用户名
select
device_id,
substring_index(blog_url,"/",-1) as user_name
from user_submit
select
substring_index(substring_index(profile,",",-2),",",1) as age,
count(*) as number
from user_submit
where 1=1
group by age
33.找出每个学校GPA最低的同学
提供两种思路:窗口函数与两表合并
select
device_id,
university,
gpa
from
(select
device_id,
university,
gpa,
rank() over(partition by university order by gpa) as rank_in_university
from user_profile) as table1
where 1 = 1
and table1.rank_in_university = 1
####################################
select
table1.device_id,
table1.university,
table1.gpa
from
(select device_id,university,gpa from user_profile) as table1
inner join
(select device_id,university,gpa,min(gpa) over (partition by university) as min_gpa from user_profile) as table2
on table1.device_id = table2.device_id
and table1.university = table2.university
and table1.gpa = table2.min_gpa
order by table1.university
34.统计复旦用户8月练题情况
提供两种思路:朴素解法与正常解法
朴素解法:先统计出有答题记录的device_id的情况再统计没有答题记录的device_id的情况,利用count不统计none、left join的左右拼接与union all的上下拼接
select
table1.device_id,
table1.university,
table1.question_cnt,
if(table2.question_cnt is null,0,table2.question_cnt) as right_question_cnt
from
(select
user_profile.device_id,
university,
count(result) as question_cnt
from user_profile
left join question_practice_detail
on user_profile.device_id = question_practice_detail.device_id
where 1 = 1
and university = "复旦大学"
and date between "2021-08-01" and "2021-08-31"
group by device_id) as table1
left join
(select
user_profile.device_id,
university,
count(result) as question_cnt
from user_profile
left join question_practice_detail
on user_profile.device_id = question_practice_detail.device_id
where 1 = 1
and university = "复旦大学"
and date between "2021-08-01" and "2021-08-31"
and result = "right"
group by device_id) as table2
on table1.device_id = table2.device_id
union all
select
distinct(user_profile.device_id),
university,
0 as question_cnt,
0 as right_question_cnt
from user_profile
left join question_practice_detail
on user_profile.device_id = question_practice_detail.device_id
where 1 = 1
and university = "复旦大学"
and user_profile.device_id not in
(select
user_profile.device_id
from user_profile
left join question_practice_detail
on user_profile.device_id = question_practice_detail.device_id
where 1 = 1
and university = "复旦大学"
and date between "2021-08-01" and "2021-08-31"
)
正常解法:
count在没有分组时不统计none而分组过后统计none的数量
select
user_profile.device_id,
university,
count(result) as question_cnt,
sum(if(result = "right",1,0)) as right_question_cnt
from user_profile
left join question_practice_detail
on user_profile.device_id = question_practice_detail.device_id
and date between "2021-08-01" and "2021-08-31"
where 1 = 1
and university = "复旦大学"
group by user_profile.device_id
35.浙大不同难度题目的正确率
难点在于聚合函数,不必考虑每种情况,只需要概化到统计正确和总数即可
select
difficult_level,
sum(if(result = "right",1,0)) / count(result) as correct_rate
from question_practice_detail
left join user_profile
on question_practice_detail.device_id = user_profile.device_id
left join question_detail
on question_practice_detail.question_id = question_detail.question_id
where 1 = 1
and university = "浙江大学"
group by difficult_level
order by correct_rate
36.查找后排序
select
device_id,
age
from user_profile
order by age
37.查找后多列排序
select
device_id,
gpa,
age
from user_profile
order by gpa,age
38.查找后降序排列
select
device_id,
gpa,
age
from user_profile
order by gpa desc,age desc
39.21年8月份练题总数
select
count(distinct(device_id)) as did_cnt,
count(question_id) as question_cnt
from question_practice_detail
where date between "2021-08-01" and "2021-08-31"