1、有如下数据:(建表语句+sql查询)
id names
1 aa,bb,cc,dd,ee
2 aa,bb,ff,ww,qq
3 aa,cc,rr,yy
4 aa,bb,dd,oo,pp
求英雄的出场排名top3的出场次数及出场率
create table if not exists t_names(
id int,
names array
)
row format delimited
fields terminated by ‘\t’
collection items terminated by ‘,’
;
select *
from (
select name,cc,cc / (sum(cc) over()) as ccl,
rank() over(sort by cc desc) as rk
from (
select
name,
count(1) as cc
from t_names lateral view explode(names) tt as name
group by
name
) a
) aa
where aa.rk <= 3
;
2、有如下通话记录:
Zhangsan Wangwu 01:01:01
Zhangsan Zhaoliu 00:11:21
Zhangsan Yuqi 00:19:01
Zhangsan Jingba 00:21:01
Zhangsan Wuxi 01:31:17
Wangwu Zhaoliu 00:51:01
Wangwu Zhaoliu 01:11:19
Wangwu Yuqi 00:00:21
Wangwu Yuqi 00:23:01
Yuqi Zhaoliu 01:18:01
Yuqi Wuxi 00:18:00
Jingba Wangwu 00:01:01
Jingba Wangwu 00:00:06
Jingba Wangwu 00:02:04
Jingba Wangwu 00:02:54
Wangwu Yuqi 01:00:13
Wangwu Yuqi 00:01:01
Wangwu Zhangsan 00:01:01
统计两个人的通话总时长(用户之间互相通话的时长)
create table relations(
fromstr string,
tostr string,
time string
)
row format delimited
fields terminated by ’ ’
;
select
fromstr,
tostr,
sum(duration) as durations
from (
Select
Case when fromstr >= tostr then fromstr else tostr end fromstr,
Case when fromstr >= tostr then tostr else fromstr end tostr,
Split(time,’:’)[0] * 60 * 60 + Split(time,’:’)[1] * 60 + Split(time,’:’)[2] duration
from relations
) a
group by fromstr,tostr
;
5、有如下销售数据:(建表语句+sql查询)(20分)
店铺 月份 金额
a,01,150
a,01,200
b,01,1000
b,01,800
c,01,250
c,01,220
b,01,6000
a,02,2000
a,02,3000
b,02,1000
b,02,1500
c,02,350
c,02,280
a,03,350
a,03,250
3、编写Hive的HQL语句求出每个店铺的当月销售额和累计到当月的总销售额
create table t_store(
name string,
months int,
money int
)
row format delimited fields terminated by “,”;
select name,months,amoney,sum(amoney) over(distribute by name sort by months asc rows between unbounded preceding and current row) as totalmomey
from (
Select name,months,sum(money) as amoney
From t_store
Group by name,months
) a
;
4、[Hive SQL]统计amt连续3个月,环比增长>50%的user
user_id month amt
1,20170101,100
3,20170101,20
4,20170101,30
1,20170102,200
2,20170102,240
3,20170102,30
4,20170102,2
1,20170101,180
2,20170101,250
3,20170101,30
4,20170101,260
…
…
select user_id
from(
select
user_id,month,mon_amt,pre_mon_amt,
sum(case when ((mon_amt - pre_mon_amt) / pre_mon_amt * 100) > 50
and datediff(to_date(month,‘yyyymm’),to_date(pre2_month,‘yyyymm’),‘mm’) = 2
then 1
else 0 end) over(partition by user_id order by month asc rows between current row and 2 following) as flag
from (
select
user_id,
substr(month,0,6) as month,
sum(amt) as mon_amt,
lag(sum(amt),1,0.00001) over(partition by user_id order by substr(month,0,6) asc ) as pre_mon_amt,
substr(lag(substr(month,0,6),2,‘199001’) over(partition by user_id order by substr(month,0,6) asc),0,6) as pre_2_mon
from amt
group by user_id,substr(month,0,6)
) t1
) t2
where t2.flag >=3;
5、hive经典50题
数据
Student(Sid,Sname,Sage,Ssex)学生表
Sid:学号
Sname:学生姓名
Sbirth:学生生日
Ssex:学生性别
Course(Cid,Cname,T#)课程表
Cid:课程编号
Cname:课程名称
Tid:教师编号
SC(Sid,Cid,score)成绩表
Sid:学号
Cid:课程编号
score:成绩
Teacher(Tid,Tname)教师表
Tid:教师编号:
Tname:教师名字
01 赵雷 1990-01-01 男
02 钱电 1990-12-21 男
03 孙风 1990-05-20 男
04 李云 1990-08-06 男
05 周梅 1991-12-01 女
06 吴兰 1992-03-01 女
07 郑竹 1989-07-01 女
08 王菊 1990-01-20 女
01 语文 02
02 数学 01
03 英语 03
01 张三
02 李四
03 王五
01 01 80
01 02 90
01 03 99
02 01 70
02 02 60
02 03 80
03 01 80
03 02 80
03 03 80
04 01 50
04 02 30
04 03 20
05 01 76
05 02 87
06 01 31
06 03 34
07 02 89
07 03 98
题目+答案
–1、查询01课程比02课程成绩高的所有学生的学号
select sc1.sid,sc1.score score1,sc2.score score2
from sc sc1
join sc sc2 on sc1.sid=sc2.sid
where sc1.cid = 1 and sc2.cid = 2 and sc1.score>sc2.score
–2、查询平均成绩大于60分的同学的学号和平均成绩
select sid,avg(score) avgscore
from sc
group by sid
having avgscore>60;
–3、查询所有同学的学号、姓名、选课数、总成绩
select stu.sid,stu.sname,
count(sc.cid) countcourse,
case when sum(sc.score) is null then 0 else sum(sc.score) end sumscore
from student stu
left join sc
on sc.sid=stu.sid
group by stu.sid,stu.sname;
–4、查询姓‘李’的老师的个数:
select count(*) from teacher where tname like ‘李%’;
–5、查询没有学过“张三”老师课程的同学的学号、姓名:
select stu.sid,sname
from student stu
join course cs
join teacher t
left join sc on stu.sid=sc.sid and cs.cid=sc.cid and t.tid=cs.tid
where tname=‘张三’
group by stu.sid,sname
having sum(case when sc.sid is null then 0 else 1 end)=0;
–6、查询学过“张三”老师所教的所有课的同学的学号、姓名:
select stu.sid,sname
from student stu
join course cs
join teacher t
left join sc on stu.sid=sc.sid and cs.cid=sc.cid and t.tid=cs.tid
where tname=‘张三’
group by stu.sid,sname
having sum(case when sc.sid is null then 1 else 0 end)=0;
–7、查询学过01并且也学过编号02课程的同学的学号、姓名:
select stu.sid,stu.sname
from student stu
join sc sc1 on sc1.sid=stu.sid
join sc sc2 on sc2.sid=sc1.sid
where sc1.cid=01 and sc2.cid=02;
–8、查询课程编号02的成绩比课程编号01课程成绩低的所有同学的学号、姓名:
select stu.sid,stu.sname
from student stu
join sc sc1 on sc1.sid=stu.sid and sc1.cid=01
left join sc sc2 on sc2.sid=sc1.sid and sc2.cid=02
where sc1.score>sc2.score or sc2.score is null;
–9、查询所有课程成绩小于60的同学的学号、姓名:
select stu.sid,stu.sname
from student stu
left join sc
on sc.sid=stu.sid and sc.score>=60
group by stu.sid,stu.sname
having sum(case when sc.sid is null then 0 else 1 end)=0;
–10、查询没有学全所有课的同学的学号、姓名:
select stu.sid,stu.sname
from student stu
left join course cs
left join sc on sc.sid=stu.sid and cs.cid=sc.cid
group by stu.sid,stu.sname
having sum(case when sc.cid is null then 1 else 0 end)>0;
–11、查询至少有一门课与学号为01同学所学相同的同学的学号和姓名:
select distinct st.sid,st.sname
from student st
join sc sc1
on st.sid=sc1.sid
join sc sc2
on sc1.cid=sc2.cid
where sc2.sid=1;
–12、查询至少学过学号为01同学所有一门课的其他同学学号和姓名;
select distinct st.sid,st.sname
from student st
join sc sc1
on st.sid=sc1.sid
join sc sc2
on sc1.cid=sc2.cid
where sc2.sid=1 and sc1.sid!=1;
–13、查询张三老师教的课的平均成绩:
select avg(score) avgscore
from sc
join course co
on sc.cid=co.cid
join teacher t
on t.tid=co.tid
where tname=‘张三’;
–14、查询和02号的同学学习的课程完全相同的其他同学学号和姓名:
select stu.sid,stu.sname
from student stu
join
(
select stu.sid as stid,sc1.sid as scid,case when stu.sid is null then sc1.sid else stu.sid end as all_id
from student stu
join (
select sc.cid
from sc
where sc.sid = 2
) aa
full outer join sc sc1 on sc1.cid = aa.cid and sc1.sid = stu.sid
) a on a.all_id = stu.sid
group by stu.sid,stu.sname
having sum(case when stid is null or scid is null then 1 else 0 end) = 0 and st.sid!=2
;
–15、查询学习“张三”老师课的成绩表记录:
select sc.*
from sc
join course c
on sc.cid=c.cid
join teacher t
on c.tid=t.tid
where t.tname=‘张三’;
–16、查询没有上过编号03课程的同学学号的02号课的成绩:
select sc.*
from sc
left join
(select * from sc where sc.cid = ‘3’) sc2
on sc.sid =sc2.sid
where sc2.cid is null and sc.cid=2
;
–17、按平均成绩从高到低显示所有学生的“语文”、“数学”、“英语”三门的课程成绩,
–按如下形式显示:学生ID,数据库,企业管理,英语,有效课程数,有效平均分
select sc.sid,
max(case course.cname when ‘语文’ then sc.score else 0 end) yuwen,
max(case course.cname when ‘数学’ then sc.score else 0 end) shuxue,
max(case course.cname when ‘英语’ then sc.score else 0 end) yingyu,
count(sc.cid) kechengshu,
avg(sc.score) pingjunfen
from sc join course
on sc.cid=course.cid
group by sc.sid
order by pingjunfen;
–18、查询各科成绩最高和最低的分:以如下的形式显示:课程ID,最高分,最低分
select cid,max(score) maxscore,min(case when score is null then 0 else score end) minscore
from sc
group by cid;
–19、按各科平均成绩从低到高和及格率的百分数从高到低顺序:
select avg(score) avgscore,concat(cast(sum(case when score >= 60 then 1 else 0 end)/count(sc.sid) as string),’%’) jigelv
from sc
group by cid
order by avgscore asc,jigelv desc;
–20、查询如下课程平均成绩和及格率的百分数(用”1行”显示): 语文(01),数学(02),英语(03)
select
max(case t1.cid when 1 then concat(t1.avgscore,’:’,jigelv) else 0 end) as yuwen,
max(case t1.cid when 2 then concat(t1.avgscore,’:’,jigelv) else 0 end) as shuxue,
max(case t1.cid when 3 then concat(t1.avgscore,’:’,jigelv) else 0 end) as yingyu
from
(select sc.cid,avg(score) avgscore,
concat(cast(sum(case when score >= 60 then 1 else 0 end)*100/count(sc.sid) as string),’%’) jigelv
from sc
join course cs
on sc.cid=cs.cid
group by sc.cid,cs.cname having cs.cname=‘语文’ or cs.cname=‘数学’ or cs.cname=‘英语’) t1;
–21、查询不同老师所教不同课程平均分从高到低显示:
select cs.tid,avg(score) avgscore
from sc
join course cs
on sc.cid=cs.cid
join teacher t
on t.tid=cs.tid
group by cs.tid,cs.cid
order by avgscore desc;
–22、查询如下课程成绩第3名到第6名的学生成绩单:语文(01),数学(02),英语(03)
select a.*
from
(
select sc.*,
rank() over(distribute by sc.cid sort by sc.score desc) rk
from sc) a
where a.rk between 3 and 6;
–23、统计下列各科成绩,各分数段人数:课程ID,课程名称,[100-85],[85-70],[70-60],[ 小于60] :
select a.cid,a.cname,a.px,
count(a.px)
from
(
select cs.cid,cs.cname,
(case when score<60 then ‘[小于60]’
when score<70 then ‘[70-60]’
when score<85 then ‘[85-70]’
else ‘[100-85]’ end) as px
from sc
join course cs
on sc.cid=cs.cid
) a
group by a.cid,a.cname,a.px
;
–24、查询学生平均成绩及其名次:
select a.*,
rank() over(distribute by 1 sort by a.avgscore) rk
from
(
select sc.sid,
avg(sc.score) avgscore
from sc
group by sc.sid
) a;
–25、查询各科成绩前三名的记录(不考虑成绩并列情况):
select a.,cs.cname
from
(
select sc.,
row_number() over(distribute by sc.cid sort by sc.score desc) rk
from sc
) a
join course cs
on cs.cid=a.cid
where a.rk<4;
–26、查询每门课程被选修的学生数:
select cs.cid,cs.cname,sum(case when sc.sid is null then 0 else 1 end) cd
from sc
right join course cs
on sc.cid=cs.cid
group by cs.cid,cs.cname;
–27、查询出只选修一门课程的全部学生的学号和姓名:
select stu.sid,stu.sname
from student stu
join sc
on sc.sid=stu.sid
group by stu.sid,stu.sname
having count(stu.sid)=1;
–28、查询男生、女生人数:
select sum(if(ssex=‘男’,1,0)) male,
sum(if(ssex=‘女’,1,0)) female
from student;
–29、查询姓“张”的学生名单:
select * from student
where sname like ‘张%’;
–30、查询同名同姓的学生名单,并统计同名人数:
select stu.*,
count(sid) over(distribute by stu.sname) stucount
from student stu;
–31、1981年出生的学生名单
select stu.*
from student stu
where substring(stu.sage,1,4)=‘1990’;
–32、查询平均成绩大于80的所有学生的学号、姓名和平均成绩:
select stu.sid,stu.sname,
avg(score) avgscore
from student stu
join sc
on sc.sid=stu.sid
group by stu.sid,stu.sname
having avgscore>80;
–33、查询每门课程的平均成绩,结果按平均成绩升序排序,平均成绩相同时,按课程号降序排列:
select sc.cid,cs.cname,avg(score) avgscore
from sc
join course cs
on cs.cid=sc.cid
group by sc.cid,cs.cname
order by avgscore asc,sc.cid desc;
–34、查询课程名称为“数学”,且分数低于60的学生名字和分数:
select sname,score
from sc
join student stu
on stu.sid=sc.sid
join course cs
on sc.cid=cs.cid
where cs.cname=‘数学’ and score<60;
–35、查询所有学生的选课情况:
select stu.sid,stu.sname,cs.cname
from sc
join course cs
on cs.cid=sc.cid
join student stu
on
stu.sid=sc.sid;
–36、查询任何一门课程成绩在70分以上的姓名、课程名称和分数:
select sname,cname,score
from student stu
join sc
on sc.sid=stu.sid
join course cs
on cs.cid=sc.cid
where sc.score>70;
–37、查询不及格的课程,并按课程号从大到小的排列:
select sc.cid,cname
from sc
join course cs
on cs.cid=sc.cid
where sc.score<60
group by sc.cid,cname
order by sc.cid desc;
–38、查询课程编号为03且课程成绩在80分以上的学生的学号和姓名:
select stu.sid,stu.sname
from student stu
join sc
on sc.sid=stu.sid
where sc.cid=3 and score>=80;
–39、求选了课程的学生人数:
select count(aa.sid) from
(select sid
from sc
group by sid) aa;
–40、查询选修“张三”老师所授课程的学生中,成绩最高的学生姓名及其成绩
select
first_value(sname)
over(distribute by tname sort by score desc ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) name,
first_value(score)
over(distribute by tname sort by score desc ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) score
from sc
join student stu
on stu.sid=sc.sid
join course cs
on cs.cid=sc.cid
join teacher t
on t.tid=cs.tid
where tname=‘张三’
limit 1
;
select sname,score
from sc
join student stu
on stu.sid=sc.sid
join course cs
on cs.cid=sc.cid
join teacher t
on t.tid=cs.tid
where tname=‘张三’
order by score desc
limit 1;
–41、查询各个课程及相应的选修人数:
select sc.cid,cs.cname,count(sc.sid) cnt
from sc
join course cs
on sc.cid=cs.cid
group by sc.cid,cs.cname;
–42、查询不同课程成绩相同的学生和学号、课程号、学生成绩:
select stu.sname,stu.sid,sc.cid,sc.score
from student stu
join sc
on sc.sid=stu.sid
join course cs
on cs.cid=sc.cid
order by sc.score
select sc.sid,sc.cid,sc.score
from sc
join
(
select sc.score,count(sc.score) cntscore
from sc
group by sc.score
) a on a.score=sc.score
join course cs
on cs.cid=sc.cid
where a.cntscore>1;
–43、查询每门课程成绩最好的前两名:
select sid,cid,score
from
(
select sc.*,
rank() over(distribute by cid sort by score) rk
from sc
) aa
where aa.rk<3;
–44、统计每门课程的学生选修人数(超过5人的课程才统计)。
–要求输出课程号和选修人数,查询结果按人数降序排序,若人数相同,按课程号升序排序:
select cid,count(sid) cntsid
from sc
group by cid having cntsid>5
order by cntsid desc,cid asc;
–45、检索至少选修两门课程的学生学号:
select sid
from sc
group by sid
having count(cid)>=2
;
–46、查询全部学生选修的课程的课程号和课程名:
select cs.cid,cs.cname
from student stu
join course cs
left join sc
on sc.cid=cs.cid and stu.sid=sc.sid
group by cs.cid,cs.cname
Having sum(case when sc.score is null then 1 else 0 end)=0;
select cid,cname,sum1 from (
select sc.cid,cs.cname,sum(case when score is null then 0 else 1 end) sum1
from student stu
join course cs
left join sc
on sc.cid=cs.cid and stu.sid=sc.sid
group by sc.cid,cs.cname
) aa
join (select count(*) c from student) bb
where sum1=bb.c;
–47、查询没学过”张三”老师讲授的任一门课程的学生姓名:
select stu.sname
from student stu
join course cs
left join teacher t
on t.tid=cs.tid
left join sc
on sc.sid=stu.sid and cs.cid=sc.cid
where tname = ‘张三’
group by sname
having sum(case when score is null then 0 else 1 end)=0
;
–48、查询两门以上不及格课程的同学的学号以及其平均成绩:
select sid,count(cid) cd
from sc
where score<60
group by sid
having cd>=2;
–49、检索02课程分数小于60,按分数降序排列的同学学号
select sc.sid,score
from sc
where sc.score<60 and sc.cid=2
order by score desc;
–50、查询任意一门课程成绩在70分以上的姓名、课程名称和分数:
select sname,cname,score
from student stu
join sc
on sc.sid=stu.sid
join course cs
on cs.cid=sc.cid
where score>70;