hive经典习题50题

-- 额外题hive实现词频统计 文件在hdfs的/data目录下
create table word(line string) location '/data/';
select word,count(*) as word_count
from word
lateral view explode(split(line,' ')) t1 as word
group by word
order by word_count desc;
-- 建库
create database if not exists hql50;
use hql50;
-- 建表
-- 课程表
create table if not exists course(course_id int,course_name string,teacher_id int);
-- 分数表
create table if not exists score(student_id int,course_id int,score int);
-- 学生表
create table if not exists student(student_id int,student_name string,student_birth string,student_sex string);
-- 教师表
create table if not exists teacher(teacher_id int,
	teacher_name string
);
-- 插入数据
-- 课程表
insert into table course values 
(01,'语文',02),
(02,'数学',01),
(03,'英语',03);
-- 分数表
insert into table score values
(01,01,80),
(01,02,90),
(01,03,99),
(02,01,70),
(02,02,60),
(02,03,80),
(03,01,80),
(03,02,80),
(03,03,80),
(04,01,50),
(04,02,30),
(04,03,20),
(05,01,76),
(05,02,87),
(06,01,31),
(06,03,34),
(07,02,89),
(07,03,98);
-- 学生表
insert into table student values
(01,'赵雷',	'1990-01-01','男'),
(02,'钱电',	'1990-12-21','男'),
(03,'孙风',	'1990-05-20','男'),
(04,'李云',	'1990-08-06','男'),
(05,'周梅',	'1991-12-01','女'),
(06,'吴兰',	'1992-03-01','女'),
(07,'郑竹',	'1989-07-01','女'),
(08,'王菊',	'1990-01-20','女');
-- 教师表
insert into table teacher values (01,'张三'),(02,'李四'),(03,'王五');
--1、查询"01"课程比"02"课程成绩高的学生的信息及课程分数:
select s3.*,s1.score,s2.score
from score s1 join score s2 join student s3
on s1.student_id=s2.student_id and s2.student_id = s3.student_id
where s1.course_id=1 and s2.course_id=2 and s1.score>s2.score;
--2.查询"01"课程比"02"课程成绩低的学生的信息及课程分数:
select s3.*,s1.score,s2.score
from score s1 join score s2 join student s3
on s1.student_id=s2.student_id and s2.student_id = s3.student_id
where s1.course_id=1 and s2.course_id=2 and s1.score<s2.score;
--3.查询平均成绩大于等于60分的同学的学生编号和学生姓名和平均成绩:
with t1 as (
select round(avg(s1.score),2) a,s1.student_id
from score s1 
group by s1.student_id)
select t1.* ,s1.student_name
from t1 join student s1
on s1.student_id=t1.student_id
where t1.a>60;
--4。查询平均成绩小于60分的同学的学生编号和学生姓名和平均成绩:
--– (包括有成绩的和无成绩的)
with t1 as (
select round(avg(s1.score),2) a,s1.student_id
from score s1 
group by s1.student_id)
select s1.student_id, s1.student_name,t1.a
from student s1 left join t1 
on s1.student_id=t1.student_id
where t1.a<60 or t1.a is null ;
--5.查询所有同学的学生编号、学生姓名、选课总数、所有课程的总成绩:
with  
t1 as 
(select distinct * from 
(select  s1.student_id,count(s1.course_id) over(partition by student_id) as win1,sum(s1.score) over(partition by student_id) as win2
from score s1) t1)
select  student.student_id,student_name,case when t1.win1 is null then '0' else t1.win1 end,case when t1.win2 is null then '0' else t1.win2 end
from student left join t1 on t1.student_id = student.student_id;
--6.查询"李"姓老师的数量:
with
t1 as(
select teacher_name
from teacher 
where teacher_name like '李%')
select count(1) from t1;
--7.查询学过"张三"老师授课的同学的信息:
select s2.student_id
from score s1 join course c join teacher t1 join student s2 join (select teacher_id from teacher  where teacher_name='张三') a
on s1.course_id = c.course_id and t1.teacher_id = c.teacher_id and s1.student_id = s2.student_id and a.teacher_id=t1.teacher_id;
--8.查询没学过"张三"老师授课的同学的信息:
select distinct s1.*
from student s1 left join  score s3 on s1.student_id=s3.student_id
where  not exists 
(select s2.*
from score s1 join course c join teacher t1 join student s2 join (select teacher_id from teacher  where teacher_name='张三') a
on s1.course_id = c.course_id and t1.teacher_id = c.teacher_id and s1.student_id = s2.student_id and a.teacher_id=t1.teacher_id 
where s2.student_id=s3.student_id );
--9.查询学过编号为"01"并且也学过编号为"02"的课程的同学的信息:
select s1.*
from student s1 join (
SELECT s.`student_id`
FROM score s JOIN student stu
ON s.`student_id`=stu.`student_id`
WHERE s.`course_id` IN(1,2)
GROUP BY s.`student_id`
HAVING COUNT(s.`course_id`)=2) s2
on s1.student_id=s2.student_id;
--10.查询学过编号为"01"但是没有学过编号为"02"的课程的同学的信息:
select a.student_id,a.student_name,a.student_birth,a.student_sex 
from student a
join score b on a.student_id=b.student_id and b.course_id=1 
where not exists
(select * from score c where a.student_id=c.student_id and c.course_id=2);
--11、查询没有学全所有课程的同学的信息:
select distinct a.* 
from student a join course b left join score c 
on c.student_id =a.student_id and c.course_id=b.course_id 
where c.score is null;
--12、查询至少有一门课与学号为"01"的同学所学相同的同学的信息:
select distinct
a.*
from student a
join score b on b.student_id=1
join score c on b.course_id=c.course_id and a.student_id=c.student_id
where a.student_id!=1;
--13、查询和"01"号的同学学习的课程完全相同的其他同学的信息:
select s.student_name from student s join score sc on s.student_id=sc.student_id
join(select collect_set(course_id) as sub,count(course_id) as num from score where student_id=1 ) sc2
where array_contains(sc2.sub,sc.course_id)
group by s.student_id,s.student_name,s.student_birth,s.student_sex,sc2.num
having count(sc.course_id)=sc2.num;
--14、查询没学过"张三"老师讲授的任一门课程的学生姓名:
select a.*
from student a
where not exists
(select 1 from score b,course c,teacher d 
where a.student_id=b.student_id and b.course_id=c.course_id 
and c.teacher_id=d.teacher_id and d.teacher_name='张三');
--15、查询两门及其以上不及格课程的同学的学号,姓名及其平均成绩:
SELECT a.student_id,a.student_name,ROUND(AVG(b.score),2)
FROM student a
JOIN score b ON a.student_id=b.student_id AND b.score<60
GROUP BY a.student_id,a.student_name
HAVING COUNT(1)>=2;
--16、检索"01"课程分数小于60,按分数降序排列的学生信息:
SELECT a.*,b.course_id,b.score
FROM student a
JOIN score b ON a.student_id=b.student_id AND b.score<60
WHERE b.course_id=1
ORDER BY b.score DESC;
--17、按平均成绩从高到低显示所有学生的所有课程的成绩以及平均成绩:
SELECT
a.student_id,
b.score as 01_score,
c.score as 02_score,
d.score as 03_score,
ROUND ((b.score+c.score+d.score)/3,2) as avg_score
FROM score a
join score b on a.student_id=b.student_id and b.course_id=1
join score c on a.student_id=c.student_id and c.course_id=2
join score d on a.student_id=d.student_id and d.course_id=3
GROUP BY a.student_id,b.score,c.score,d.score
ORDER BY ROUND ((b.score+c.score+d.score)/3,2) DESC ;
--18.查询各科成绩最高分、最低分和平均分:以如下形式显示:课程ID,课程name,最高分,最低分,平均分,及格率,中等率,优良率,优秀率:
SELECT
a.course_id,
b.course_name,
MAX(a.score) AS max_score,
MIN(a.score) AS min_score,
ROUND (AVG(a.score),2) AS avg_score,
round(count(if(a.score>=60,a.score,null))/count(a.score)*100,2) as jige,
round(count(if(a.score>=70 and a.score<80,a.score,null))/count(a.score)*100,2) as zd,
round(count(if(a.score>=80 and a.score<90,a.score,null))/count(a.score)*100,2) as yl,
round(count(if(a.score>=90,a.score,null))/count(a.score)*100,2) as yx
FROM score a
JOIN course b ON a.course_id=b.course_id
GROUP BY a.course_id,b.course_name;
--19、按各科成绩进行排序,并显示排名:
select student_id,score,row_number() over(partition by course_id order by score desc )as win1
from score ;
--20、查询学生的总成绩并进行排名:
select b.student_id,row_number() over(order by b.win1 desc)
from
(
select distinct a.student_id,a.win1
from
(select student_id,sum(score) over(partition by student_id) as win1
from score) a) b;
--21、查询不同老师所教不同课程平均分从高到低显示:
select avg(score) b
from score
group by course_id
order by b desc;
--22、查询所有课程的成绩第2名到第3名的学生信息及该课程成绩:
select  a.*,s1.*
from
(select s1.*,row_number() over(partition by course_id order by score desc) as win1
from score s1) a  join student s1 on a.student_id=s1.student_id
where a.win1 in (2,3); 
--23、统计各科成绩各分数段人数:课程编号,课程名称,[100-85],[85-70],[70-60],[0-60]及所占百分比
select distinct b.course_id,b.score_group,b.c1
from (
select  a.course_id,a.score_group,(count(student_id) over(partition by course_id,a.score_group))/(count(student_id) over(partition by course_id)) c1
from (
select s1.*,case when s1.score between 0 and 60 then 1 when s1.score between 61 and 70 then 2 when s1.score between 71 and 85 then 3 when s1.score between 86 and 100 then 4 end as score_group
from score s1) a) b;
--24、查询学生平均成绩及其名次:
with a1 as (select student.student_name student_name,avg(score.score) over(distribute by score.student_id) as avgscore from score
join student on student.student_id=score.student_id)
select DISTINCT a1.student_name,a1.avgscore,row_number()over(order by a1.avgscore desc)from a1 
--25、查询各科成绩前三名的记录
with a1 as (select course.course_name course_name,student.student_name student_name,score.score score,row_number()over(partition by score.course_id)as paiming from score 
join student on student.student_id=score.student_id
join course on course.course_id=score.course_id)
select a1.course_name,a1.student_name,a1.score,a1.paiming from a1 distribute by a1.course_name limit 3
--26、查询每门课程被选修的学生数:
select course_id,count(1)
from score
group by course_id;
--27、查询出只有两门课程的全部学生的学号和姓名:
with 
t1 as 
(select score.student_id from score group by score.student_id having count(*)=2)
select t2.student_id,t2.student_name from student t2 right join t1 on t1.student_id=t2.student_id;
--28、查询男生、女生人数:
select student_sex,count(*) from student group by student_sex;
--29、查询名字中含有"风"字的学生信息:
select * from student where student_name like '%风%';
--30、查询同名同姓学生名单,并统计同名人数:
select student_name,count(*) 
from student 
group by student_name 
having count(*)>1;
--31、查询1990年出生的学生名单:
select student_name 
from student 
where year(student_birth)=1990;
--32、查询每门课程的平均成绩,结果按平均成绩降序排列,平均成绩相同时,按课程编号升序排列:
select course_id,avg(score) as a 
from score 
group by course_id 
order by a desc,course_id asc; 
--33、查询平均成绩大于等于85的所有学生的学号、姓名和平均成绩:
with t1 as 
(select student_id,round(avg(score),2)as a from score group by student_id having avg(score)>=85)
select t2.student_id,t2.student_name,t1.a 
from student t2 right join t1 on t2.student_id=t1.student_id;
--34、查询课程名称为"数学",且分数低于60的学生姓名和分数:
with 
t3 as 
(select t2.student_id,t2.score from score t2 right join (select course_id from course where course_name='数学')as t1 
on t1.course_id=t2.course_id where t2.score<60)
select t4.student_name,t3.score from student t4 
right join t3 on t4.student_id=t3.student_id;
--35、查询所有学生的课程及分数情况:
select s1.student_name,s2.course_id,s2.score
from student s1 left join score s2 on s1.student_id=s2.student_id ;
--36、查询任何一门课程成绩在70分以上的学生姓名、课程名称和分数:
with
t1 as
(select s.*,c.course_name
from score s  join course c on c.course_id=s.course_id
where score>70)
select distinct s1.student_name,t1.course_name,t1.score
from t1 join student s1 on t1.student_id = s1.student_id;
--37、查询课程不及格的学生:
with
t1 as(
select distinct student_id
from score
where score <60)
select s.*
from student s join t1 on t1.student_id=s.student_id;
--38、查询课程编号为01且课程成绩在80分以上的学生的学号和姓名:
select *
from score
where course_id=1;
--39、求每门课程的学生人数:
select distinct *
from(
select course_id,count(student_id) over(partition by course_id)
from score) a;
--40、查询选修"张三"老师所授课程的学生中,成绩最高的学生信息及其成绩:
select c.student_id,c.student_name,c1.course_name,c.score
from (
select s1.*,s.score,s.course_id,row_number() over(order by s.score desc) as win1
from score s join(
select c.course_id
from course c join (
select teacher_id
from teacher
where teacher_name='张三')a on c.teacher_id=a.teacher_id) b on s.course_id=b.course_id 
join student s1
on s1.student_id=s.student_id) c join course c1 on c1.course_id=c.course_id
where c.win1=1;
--41、查询不同课程成绩相同的学生的学生编号、课程编号、学生成绩:
select s1.student_id,s1.course_id,s2.course_id,s1.score
from score s1 ,score s2
where s1.score=s2.score and s1.course_id!=s2.course_id;
--42、查询每门课程成绩最好的前三名:
select a.*
from(
select s.*,row_number() over(partition by s.course_id order by s.score desc)as win1
from score s ) a
where a.win1<=3;
--43、统计每门课程的学生选修人数(超过5人的课程才统计):
--– 要求输出课程号和选修人数,查询结果按人数降序排列,若人数相同,按课程号升序排列
select course_id,count(student_id) over(partition by course_id) as win1
from score
order by win1 desc ,course_id asc;
--44、检索至少选修两门课程的学生学号:
select distinct a.student_id
from(
select student_id,count(course_id) over(partition by student_id) as win1
from score) a
where a.win1>=2;
--45、查询选修了全部课程的学生信息:
select distinct s.*
from (
select a.student_id
from(
select student_id ,count(course_id) over(partition by student_id) as win1
from score )a
join
(select count(course_id) c 
from course) b on a.win1=b.c ) d join student s on s.student_id=d.student_id;
--46、查询各学生的年龄(周岁):
--– 按照出生日期来算,当前月日 < 出生年月的月日则,年龄减一
select *, cast(date_format(current_date(),'yyyy') as int)-cast(date_format(student_birth,'yyyy') as int)
from student;
--47、查询本周过生日的学生:
-- 找到下周一-1即为本周最后一天,开始时间为当前天
select *
from student
where unix_timestamp(cast(concat_ws('-',date_format(current_date(),'yyyy'),date_format(student_birth,'MM'),date_format(student_birth,'dd')) as date),'yyyy-MM-dd')
between unix_timestamp(current_date())
and unix_timestamp(date_sub(next_day(current_date(),'MON'),1),'yyyy-MM-dd');
--48、查询下周过生日的学生:
-- 找到下周的最后一天和最开始一天来算 next_day(current_date(),'SUNDAY'); -- 需要注意的是老外以星期天,开始需要+7
select *
from student
where unix_timestamp(cast(concat_ws('-',date_format(current_date(),'yyyy'),date_format(student_birth,'MM'),date_format(student_birth,'dd')) as date),'yyyy-MM-dd')
between unix_timestamp(next_day(current_date(),'MON'),'yyyy-MM-dd')
and unix_timestamp(date_add(next_day(current_date(),'MON'),6),'yyyy-MM-dd');
--49、查询本月过生日的学生:
select *
from student
where month(student_birth) = month(current_date());
--50、查询12月份过生日的学生:
select *
from student 
where month(student_birth)=12;












你可能感兴趣的:(Hadoop,hive,hive习题,hive50题,hive经典习题,hql50题)