用户信息表:user_profile
其中device_id指终端编号(每个用户有唯一的一个终端),gender指性别,age指年龄,university指用户所在的学校,gpa是该用户平均学分绩点,active_days_within_30是30天内的活跃天数。
建表:
drop table if exists user_profile;
CREATE TEMP TABLE user_profile(
id int NOT NULL,
device_id int NOT NULL,
gender varchar(14) NOT NULL,
age int ,
university varchar(32) NOT NULL,
province varchar(32) NOT NULL,
gpa float);
INSERT INTO user_profile VALUES(1,2138,'male',21,'北京大学','BeiJing',3.4);
INSERT INTO user_profile VALUES(2,3214,'male',null,'复旦大学','Shanghai',4.0);
INSERT INTO user_profile VALUES(3,6543,'female',20,'北京大学','BeiJing',3.2);
INSERT INTO user_profile VALUES(4,2315,'female',23,'浙江大学','ZheJiang',3.6);
INSERT INTO user_profile VALUES(5,5432,'male',25,'山东大学','Shandong',3.8);
INSERT INTO user_profile VALUES(1,2234,'male',21,'北京大学','BeiJing',3.2);
INSERT INTO user_profile VALUES(2,2235,'male',null,'复旦大学','Shanghai',3.8);
INSERT INTO user_profile VALUES(3,2236,'female',20,'复旦大学','Shanghai',3.5);
INSERT INTO user_profile VALUES(4,2237,'female',23,'浙江大学','ZheJiang',3.3);
INSERT INTO user_profile VALUES(5,2238,'male',25,'复旦大学','Shanghai',3.1);
INSERT INTO user_profile VALUES(6,2239,'male',25,'北京大学','BeiJing',3.6);
INSERT INTO user_profile VALUES(7,2240,'male',null,'清华大学','BeiJing',3.3);
INSERT INTO user_profile VALUES(8,2241,'female',null,'北京大学','BeiJing',3.7);
1.查看用户信息表中所有的数据:
select * from user_profile;
2.取出用户的设备id对应的性别、年龄和学校的数据:
select device_id,gender,age,university from user_profile;
3.取出学校的去重数据:
select distinct(university) from user_profile;
4.查看前2个用户明细设备ID数据:
select device_id from user_profile limit 2;
5.查看前2个用户明细设备ID数据,并将列名改为 ‘user_infos_example’:
select device_id as user_infos_example from user_profile limit 2;
6.筛选出所有北京大学的学生,返回设备id和学校:
select device_id,university from user_profile where university=‘北京大学’;
7.针对24岁以上的用户开展分析,取出满足条件的设备ID、性别、年龄、学校:
select device_id,gender,age,university from user_profile where age>24;
8.针对20岁及以上且23岁及以下的用户开展分析,请你取出满足条件的设备ID、性别、年龄:
select device_id,gender,age from user_profile where age>=20 and age<=23;
9.查看除复旦大学以外的所有用户明细:
select device_id,gender,age,university from user_profile where university!=‘复旦大学’;
10.对用户的年龄分布开展分析,剔除没有获取到年龄的用户,取出所有年龄值不为空的用户的设备ID,性别,年龄,学校的信息:
select device_id,gender,age,university from user_profile where age is not null;
11.找到男性且GPA在3.5以上(不包括3.5)的用户进行调研:
select device_id,gender,age,university,gpa from user_profile where gender=‘male’ and gpa>3.5;
12.到学校为北大或GPA在3.7以上(不包括3.7)的用户进行调研
select device_id,gender,age,university,gpa from user_profile where university=‘北京大学’ or gpa>3.7;
13.找到学校为北大、复旦和山大的同学进行调研:
select device_id,gender,age,university,gpa from user_profile where university in (‘北京大学’,‘复旦大学’,‘山东大学’);
14.找到gpa在3.5以上(不包括3.5)的山东大学用户 或 gpa在3.8以上(不包括3.8)的复旦大学同学进行用户调研:
select device_id,gender,age,university,gpa from user_profile
where (gpa>3.5 and university=‘山东大学’) or (gpa>3.8 and university=‘复旦大学’);
15.查看所有大学中带有北京的用户的信息:
select device_id,age,university from user_profile where university like ‘%北京%’;
16.想要知道复旦大学学生gpa最高值是多少:
select max(gpa) from user_profile where university=‘复旦大学’;
17.看一下男性用户有多少人以及他们的平均gpa是多少:
select count(*) as male_num,avg(gpa) as avg_gpa from user_profile where gender=‘male’;
18.取出用户信息表中的用户年龄,请取出相应数据,并按照年龄升序排序:
select device_id,age from user_profile order by age asc;
19.取出用户信息表中的年龄和gpa数据,并先按照gpa升序排序,再按照年龄升序排序输出:
select device_id,gpa,age from user_profile order by gpa asc,age asc;
用户信息表 user_profile:其中device_id指终端编号(认为每个用户有唯一的一个终端),gender指性别,age指年龄,university指用户所在的学校,gpa是该用户平均学分绩点,active_days_within_30是30天内的活跃天数,question_cnt发帖数量字段,answer_cnt回答数量字段。
答题情况明细表 question_practice_detail:其中question_id是题目编号,result是答题结果。
试题难度程度表 question_detail:其中question_id是题目编号,difficult_level是题目的难度等级。
建表:
drop table if exists user_profile;
drop table if exists question_practice_detail;
drop table if exists question_detail;
CREATE TEMP TABLE user_profile (
id int NOT NULL,
device_id int NOT NULL,
gender varchar(14) NOT NULL,
age int ,
university varchar(32) NOT NULL,
gpa float,
active_days_within_30 int ,
question_cnt int ,
answer_cnt int
);
CREATE TEMP TABLE question_practice_detail (
id int NOT NULL,
device_id int NOT NULL,
question_id int NOT NULL,
result varchar(32) NOT NULL,
date date NOT NULL
);
CREATE TEMP TABLE question_detail (
id int NOT NULL,
question_idint NOT NULL,
difficult_level varchar(32) NOT NULL
);
INSERT INTO user_profile VALUES(1,2138,'male',21,'北京大学',3.4,7,2,12);
INSERT INTO user_profile VALUES(2,3214,'male',null,'复旦大学',4.0,15,5,25);
INSERT INTO user_profile VALUES(3,6543,'female',20,'北京大学',3.2,12,3,30);
INSERT INTO user_profile VALUES(4,2315,'female',23,'浙江大学',3.6,5,1,2);
INSERT INTO user_profile VALUES(5,5432,'male',25,'山东大学',3.8,20,15,70);
INSERT INTO user_profile VALUES(6,2131,'male',28,'山东大学',3.3,15,7,13);
INSERT INTO user_profile VALUES(7,4321,'male',28,'复旦大学',3.6,9,6,52);
INSERT INTO question_practice_detail VALUES(1,2138,111,'wrong','2021-05-03');
INSERT INTO question_practice_detail VALUES(2,3214,112,'wrong','2021-05-09');
INSERT INTO question_practice_detail VALUES(3,3214,113,'wrong','2021-06-15');
INSERT INTO question_practice_detail VALUES(4,6543,111,'right','2021-08-13');
INSERT INTO question_practice_detail VALUES(5,2315,115,'right','2021-08-13');
INSERT INTO question_practice_detail VALUES(6,2315,116,'right','2021-08-14');
INSERT INTO question_practice_detail VALUES(7,2315,117,'wrong','2021-08-15');
INSERT INTO question_practice_detail VALUES(8,3214,112,'wrong','2021-05-09');
INSERT INTO question_practice_detail VALUES(9,3214,113,'wrong','2021-08-15');
INSERT INTO question_practice_detail VALUES(10,6543,111,'right','2021-08-13');
INSERT INTO question_practice_detail VALUES(11,2315,115,'right','2021-08-13');
INSERT INTO question_practice_detail VALUES(12,2315,116,'right','2021-08-14');
INSERT INTO question_practice_detail VALUES(13,2315,117,'wrong','2021-08-15');
INSERT INTO question_practice_detail VALUES(14,3214,112,'wrong','2021-08-16');
INSERT INTO question_practice_detail VALUES(15,3214,113,'wrong','2021-08-18');
INSERT INTO question_practice_detail VALUES(16,6543,111,'right','2021-08-13');
INSERT INTO question_detail VALUES(1,111,'hard');
INSERT INTO question_detail VALUES(2,112,'medium');
INSERT INTO question_detail VALUES(3,113,'easy');
INSERT INTO question_detail VALUES(4,115,'easy');
INSERT INTO question_detail VALUES(5,116,'medium');
INSERT INTO question_detail VALUES(6,117,'easy');
20.了解2021年8月份所有练习过题目的总用户数和练习过题目的总次数:
select count(distinct device_id) did_cnt,count(question_id) question_cnt
from question_practice_detail
where month(date)=8;
– where date like “2021-08%”
21.对每个学校不同性别的用户活跃情况和发帖数量进行分析,计算出每个学校每种性别的用户数、30天内平均活跃天数和平均发帖数量:
select gender,university, count(*) as user_num,
avg(active_days_within_30) as avg_active_day,
avg(question_cnt) as avg_question_cnt
from user_profile
group by gender,university;
22.查看每个学校用户的平均发贴和回帖情况,寻找低活跃度学校进行重点运营,取出平均发贴数低于5的学校或平均回帖数小于20的学校:
select university,
avg(question_cnt) as avg_question_cnt,
avg(answer_cnt) as avg_answer_cnt
from user_profile
group by university
having avg_question_cnt<5 or avg_answer_cnt<20
23…查看不同大学的用户平均发帖情况,并期望结果按照平均发帖情况进行升序排列:
select university,
avg(question_cnt) as avg_question_cnt
from user_profile
group by university
order by avg_question_cnt asc;
24.查看所有来自浙江大学的用户题目回答明细情况:
select a.device_id,question_id,result
from question_practice_detail a
left join user_profile b
on a.device_id=b.device_id
where b.university='浙江大学'
25.了解每个学校答过题的用户平均答题数量情况,结果按照university升序排序。
select university,
count(b.question_id)/count(distinct(b.device_id)) as avg_answer_cnt
from user_profile a
inner join question_practice_detail b on a.device_id=b.device_id
group by university
order by university asc;
26.计算一些参加了答题的不同学校、不同难度的用户平均答题量:
(结果在小数点位数保留4位,4位之后四舍五入)
select a.university,c.difficult_level,
round(count(b.question_id)/count(distinct(b.device_id)),4)as avg_answer_cnt
from user_profile a
inner join question_practice_detail b on a.device_id=b.device_id
left join question_detail c on b.question_id=c.question_id
group by university,difficult_level;
27.查看参加了答题的山东大学的用户在不同难度下的平均答题题目数
(结果在小数点位数保留4位,4位之后四舍五入):
select a.university,c.difficult_level,
round(count(b.question_id)/count(distinct(b.device_id)),4)as avg_answer_cnt
from user_profile a inner join question_practice_detail b on a.device_id=b.device_id
left join question_detail c on b.question_id=c.question_id
group by university,difficult_level
having a.university='山东大学';
28.查看学校为山东大学或者性别为男性的用户的device_id、gender、age和gpa数据:
select device_id,gender,age,gpa from user_profile
where university='山东大学'
union all
select device_id,gender,age,gpa from user_profile
where gender='male';
29.将用户划分为25岁以下和25岁及以上两个年龄段,分别查看这两个年龄段用户数量
(age为null 也记为 25岁以下)
select case when age>=25 then '25岁及以上'
when age<25 or age is null then '25岁以下'
end age_cut,count(*) number
from user_profile
group by age_cut;
30.将用户划分为20岁以下,20-24岁,25岁及以上三个年龄段,分别查看不同年龄段用户的明细情况,(若年龄为空请返回其他):
select device_id,gender,
case when age<20 then '20岁以下'
when age between 20 and 24 then '20-24岁'
when age>=25 then '25岁及以上'
else '其他'
end age_cut
from user_profile;
31.计算出2021年8月每天用户练习题目的数量:
select day(date) as day,
count(question_id) as question_cnt from
question_practice_detail where
date_format(date,"%Y-%m")="2021-08"
-- month(date)=8 and year(date)=2021
group by date
32.查看用户在某天刷题后第二天还会再来刷题的平均概率(用户的平均次日留存率)
select COUNT(distinct b.device_id,date2) / COUNT(distinct a.device_id,date) as avg_ret
from
-- date_add(date1, interval 1 day) -- 第二天来了
question_practice_detail as a
left join
(select device_id,date_add(date,interval 1 day) as date2
from question_practice_detail) as b
on a.device_id=b.device_id and b.date2=a.date
33.取出每个学校的最低gpa:
select device_id,university,gpa from user_profile
where (university,gpa) in
(select university,min(gpa) from user_profile group by university)
order by university;
select device_id, university, gpa
from (
select *,
row_number() over (partition by university order by gpa) as rn
from user_profile
) as univ_min
where rn=1
order by university;
34.了解复旦大学的每个用户在8月份练习的总题目数和回答正确的题目数情况,对于在8月份没有练习过的用户,答题数结果返回0
select a.device_id, a.university,
count(b.question_id) question_cnt,
sum(if(b.result='right', 1, 0)) as right_question_cnt
from user_profile a left join question_practice_detail b
on a.device_id=b.device_id and month(b.date)=8
where a.university='复旦大学'
group by a.device_id;
35.了解浙江大学的用户在不同难度题目下答题的正确率情况,按照准确率升序输出。
select c.difficult_level,sum(if(b.result='right',1,0))/count(b.question_id) as correct_rate
-- avg(if(b.result='right',1,0)) as correct_rate
from user_profile a left join question_practice_detail b on a.device_id=b.device_id
left join question_detail c on b.question_id=c.question_id
where a.university='浙江大学'
group by c.difficult_level
having c.difficult_level !='None' -- 为了防止结果中有难度为None的结果
order by correct_rate asc;
运营举办了一场比赛,收到了一些参赛申请:
drop table if exists user_submit;
CREATE TEMP TABLE user_submit (
id int NOT NULL,
device_id int NOT NULL,
profile varchar(100) NOT NULL,
blog_url varchar(100) NOT NULL
);
INSERT INTO user_submit VALUES(1,2138,'180cm,75kg,27,male','http:/url/bisdgboy777');
INSERT INTO user_submit VALUES(1,3214,'165cm,45kg,26,female','http:/url/dkittycc');
INSERT INTO user_submit VALUES(1,6543,'178cm,65kg,25,male','http:/url/tigaer');
INSERT INTO user_submit VALUES(1,4321,'171cm,55kg,23,female','http:/url/uhsksd');
INSERT INTO user_submit VALUES(1,2131,'168cm,45kg,22,female','http:/url/sysdney');
36.统计每个性别的用户分别有多少参赛者
select substring_index(profile,',',-1) as gender,
-- if(profile like '%female','female','male') gender,
count(device_id) as number
from user_submit
group by gender;
37.把用户的个人博客用户字段提取出单独记录为一个新的字段:
(blog_url字段中url字符后的字符串为用户个人博客的用户名)
-- 字段切割法 substring_index(string, '切割标志', 位置数(负号:从后面开始))
-- device_id,substring_index(blog_url,'/',-1) as user_name
-- 替换法 replace(string, '被替换部分','替换后的结果')
-- device_id, replace(blog_url,'http:/url/','') as user_name -- 删除法 trim('被删除字段' from 列名)
-- device_id, trim('http:/url/' from blog_url) as user_name
-- 截取法 substr(string, start_point, length*截取长度*)
select device_id, substr(blog_url,11,length(blog_url)-10) as user_nam
from user_submit;
38.统计每个年龄的用户分别有多少参赛者:
select substring_index(substring_index(profile,',',3),',',-1) age,
count(device_id) as number
from user_submit
group by (age)