学生表:
Student(s_id,s_name,s_birth,s_sex) #学生编号,学生姓名, 出生年月,学生性别
成绩表:
Score(s_id,c_id,s_s_score) #学生编号,课程编号,分数
课程表:
Course(c_id,c_name,t_id) #课程编号, 课程名称, 教师编号
教师表:
Teacher(t_id,t_name) #教师编号,教师姓名
create table student(s_id string,s_name string,s_birth string,s_sex string) row format delimited fields terminated by '\t';
create table score(s_id string,c_id string,s_score int) row format delimited fields terminated by '\t';
create table course(c_id string,c_name string,t_id string) row format delimited fields terminated by '\t';
create table teacher(t_id string,t_name string) row format delimited fields terminated by '\t';
注意路径没有是要创建的,注意行数据是tab键隔开的
vi /opt/module/datas/student.csv
01 赵雷 1990-01-01 男
02 钱电 1990-12-21 男
03 孙风 1990-05-20 男
04 李云 1990-08-06 男
05 周梅 1991-12-01 女
06 吴兰 1992-03-01 女
07 郑竹 1989-07-01 女
08 王菊 1990-01-20 女
vi /opt/module/datas/score.csv
01 01 80
01 02 90
01 03 99
02 01 70
02 02 60
02 03 80
03 01 80
03 02 80
03 03 80
04 01 50
04 02 30
04 03 20
05 01 76
05 02 87
06 01 31
06 03 34
07 02 89
07 03 98
vi /opt/module/datas/course.csv
01 语文 02
02 数学 01
03 英语 03
vi /opt/module/datas/teacher.csv
01 张三
02 李四
03 王五
注意也是根据自己的路径不要搞错了
load data local inpath '/opt/module/datas/student.csv' into table student;
load data local inpath '/opt/module/datas/score.csv' into table score;
load data local inpath '/opt/module/datas/course.csv' into table course;
load data local inpath '/opt/module/datas/teacher.csv' into table teacher;
ps:hive查询的语法
SELECT [ALL | DISTINCT] select_expr, select_expr, ...
FROM table_reference
[WHERE where_condition]
[GROUP BY col_list [HAVING condition]]
[CLUSTER BY col_list
| [DISTRIBUTE BY col_list] [SORT BY| ORDER BY col_list]
]
[LIMIT number]
区别:
法一是from 三个表连接,s_core分为两列,
法二:a和b作为子查询,s_core为一列
方法一:
select student.*,a.s_score as 01_score,b.s_score as 02_score
from student
join score a on student.s_id=a.s_id and a.c_id='01'
left join score b on student.s_id=b.s_id and b.c_id='02'
where a.s_score>b.s_score;
方法二:
select student.*,a.s_score as 01_score,b.s_score as 02_score
from student
join score a on a.c_id='01'
join score b on b.c_id='02'
where a.s_id=student.s_id and b.s_id=student.s_id and a.s_score>b.s_score;
方法一:
select student.*,a.s_score as 01_score,b.s_score as 02_score
from student
join score a on student.s_id=a.s_id and a.c_id='01'
left join score b on student.s_id=b.s_id and b.c_id='02'
where a.s_score<b.s_score;
方法二:
select student.*,a.s_score as 01_score,b.s_score as 02_score
from student
join score a on a.c_id='01'
join score b on b.c_id='02'
where a.s_id=student.s_id and b.s_id=student.s_id and a.s_score<b.s_score;
方法一:
select student.s_id,student.s_name,tmp.avgScorse from student
join (select score.s_id,round(avg(score.s_score),1)as avgScorse
from score group by s_id)as tmp
on tmp.avgScorse>=60
where student.s_id = tmp.s_id;
方法二:
select student.s_id,student.s_name,round(avg (score.s_score),1) as avgScorse from student
join score on student.s_id = score.s_id
group by student.s_id,student.s_name
having avg (score.s_score) >= 60;
(包括有成绩的和无成绩的)
方法一:
select student.s_id,student.s_name,tmp.avgScore from student
join (
select score.s_id,round(avg(score.s_score),1)as avgScore from score group by s_id)as tmp
on tmp.avgScore < 60
where student.s_id=tmp.s_id
union all
select s2.s_id,s2.s_name,0 as avgScore from student s2
where s2.s_id not in
(select distinct sc2.s_id from score sc2);
方法二:
select score.s_id,student.s_name,round(avg (score.s_score),1) as avgScore from student
inner join score on student.s_id=score.s_id
group by score.s_id,student.s_name
having avg (score.s_score) < 60
union all
select s2.s_id,s2.s_name,0 as avgScore from student s2
where s2.s_id not in
(select distinct sc2.s_id from score sc2);
select student.s_id,student.s_name,(count(score.c_id) )as total_count,sum(score.s_score)as total_score
from student
left join score on student.s_id=score.s_id
group by student.s_id,student.s_name ;
select t_name,count(1) from teacher where t_name like '李%' group by t_name;
select student.* from student
join score on student.s_id =score.s_id
join course on course.c_id=score.c_id
join teacher on course.t_id=teacher.t_id and t_name='张三';
select student.* from student
left join (select s_id from score
join course on course.c_id=score.c_id
join teacher on course.t_id=teacher.t_id and t_name='张三')tmp
on student.s_id =tmp.s_id
where tmp.s_id is null;
select * from student
join (select s_id from score where c_id =1 )tmp1
on student.s_id=tmp1.s_id
join (select s_id from score where c_id =2 )tmp2
on student.s_id=tmp2.s_id;
select student.* from student
join (select s_id from score where c_id =1 )tmp1
on student.s_id=tmp1.s_id
left join (select s_id from score where c_id =2 )tmp2
on student.s_id =tmp2.s_id
where tmp2.s_id is null;
–先查询出课程的总数量
select count(1) from course;
–再查询所需结果
select student.* from student
left join(
select s_id
from score
group by s_id
having count(c_id)=3)tmp
on student.s_id=tmp.s_id
where tmp.s_id is null;
方法二(一步到位):
select student.* from student
join (select count(c_id)num1 from course)tmp1
left join(
select s_id,count(c_id)num2
from score group by s_id)tmp2
on student.s_id=tmp2.s_id and tmp1.num1=tmp2.num2
where tmp2.s_id is null;
select student.* from student
join (select c_id from score where score.s_id=01)tmp1
join (select s_id,c_id from score)tmp2
on tmp1.c_id =tmp2.c_id and student.s_id =tmp2.s_id
where student.s_id not in('01')
group by student.s_id,s_name,s_birth,s_sex;
–备注:hive不支持group_concat方法,可用 concat_ws(’|’, collect_set(str)) 实现
select student.*,tmp1.course_id from student
join (select s_id ,concat_ws('|', collect_set(c_id)) course_id from score
group by s_id having s_id not in (1))tmp1
on student.s_id = tmp1.s_id
join (select concat_ws('|', collect_set(c_id)) course_id2
from score where s_id=1)tmp2
on tmp1.course_id = tmp2.course_id2;
select student.* from student
left join (select s_id from score
join (select c_id from course join teacher on course.t_id=teacher.t_id and t_name='张三')tmp2
on score.c_id=tmp2.c_id )tmp
on student.s_id = tmp.s_id
where tmp.s_id is null;
select student.s_id,student.s_name,tmp.avg_score from student
inner join (select s_id from score
where s_score<60
group by score.s_id having count(s_id)>1)tmp2
on student.s_id = tmp2.s_id
left join (
select s_id,round(AVG (score.s_score)) avg_score
from score group by s_id)tmp
on tmp.s_id=student.s_id;
select student.*,s_score from student,score
where student.s_id=score.s_id and s_score<60 and c_id='01'
order by s_score desc;
select a.s_id,tmp1.s_score as chinese,tmp2.s_score as math,tmp3.s_score as english,
round(avg (a.s_score),2) as avgScore
from score a
left join (select s_id,s_score from score s1 where c_id='01')tmp1 on tmp1.s_id=a.s_id
left join (select s_id,s_score from score s2 where c_id='02')tmp2 on tmp2.s_id=a.s_id
left join (select s_id,s_score from score s3 where c_id='03')tmp3 on tmp3.s_id=a.s_id
group by a.s_id,tmp1.s_score,tmp2.s_score,tmp3.s_score order by avgScore desc;
–及格为>=60,中等为:70-80,优良为:80-90,优秀为:>=90
select course.c_id,course.c_name,tmp.maxScore,tmp.minScore,tmp.avgScore,tmp.passRate,tmp.moderate,tmp.goodRate,tmp.excellentRates from course
join(select c_id,max(s_score) as maxScore,min(s_score)as minScore,
round(avg(s_score),2) avgScore,
round(sum(case when s_score>=60 then 1 else 0 end)/count(c_id),2)passRate,
round(sum(case when s_score>=60 and s_score<70 then 1 else 0 end)/count(c_id),2) moderate,
round(sum(case when s_score>=70 and s_score<80 then 1 else 0 end)/count(c_id),2) goodRate,
round(sum(case when s_score>=80 and s_score<90 then 1 else 0 end)/count(c_id),2) excellentRates
from score group by c_id)tmp on tmp.c_id=course.c_id;
-row_number() over()分组排序功能(mysql没有该方法)
select c_id,s_id,s_score,row_number()over(partition by c_id order by s_score desc)as rank from score;
select score.s_id,s_name,sum(s_score) sumscore,row_number()over(order by sum(s_score) desc) Ranking
from score ,student
where score.s_id=student.s_id
group by score.s_id,s_name order by sumscore desc;
方法一:
select course.c_id,course.t_id,t_name,round(avg(s_score),2)as avgscore from course
join teacher on teacher.t_id=course.t_id
join score on course.c_id=score.c_id
group by course.c_id,course.t_id,t_name order by avgscore desc;
方法二:
select course.c_id,course.t_id,t_name,round(avg(s_score),2)as avgscore from course,teacher,score
where teacher.t_id=course.t_id and course.c_id=score.c_id
group by course.c_id,course.t_id,t_name order by avgscore desc;
select student.*,t1.c_id,t1.s_score,t1.ranking
from(select s_id,c_id,s_score,row_number() over (partition by c_id order by s_score desc) ranking
from score)t1
join student on student.s_id=t1.s_id
where ranking>1 and ranking<4;
select c.c_id,c.c_name,tmp1.s0_60, tmp1.percentum,tmp2.s60_70, tmp2.percentum,tmp3.s70_85, tmp3.percentum,tmp4.s85_100, tmp4.percentum
from course c
join(select c_id,sum(case when s_score<60 then 1 else 0 end )as s0_60,
round(100*sum(case when s_score<60 then 1 else 0 end )/count(c_id),2)as percentum
from score group by c_id)tmp1 on tmp1.c_id =c.c_id
left join(select c_id,sum(case when s_score<70 and s_score>=60 then 1 else 0 end )as s60_70,
round(100*sum(case when s_score<70 and s_score>=60 then 1 else 0 end )/count(c_id),2)as percentum
from score group by c_id)tmp2 on tmp2.c_id =c.c_id
left join(select c_id,sum(case when s_score<85 and s_score>=70 then 1 else 0 end )as s70_85,
round(100*sum(case when s_score<85 and s_score>=70 then 1 else 0 end )/count(c_id),2)as percentum
from score group by c_id)tmp3 on tmp3.c_id =c.c_id
left join(select c_id,sum(case when s_score>=85 then 1 else 0 end )as s85_100,
round(100*sum(case when s_score>=85 then 1 else 0 end )/count(c_id),2)as percentum
from score group by c_id)tmp4 on tmp4.c_id =c.c_id;
select tmp.*,row_number()over(order by tmp.avgScore desc) Ranking from
(select student.s_id,
student.s_name,
round(avg(score.s_score),2) as avgScore
from student join score
on student.s_id=score.s_id
group by student.s_id,student.s_name)tmp
order by avgScore desc;
–课程id为01的前三名
select score.c_id,course.c_name,student.s_name,s_score from score
join student on student.s_id=score.s_id
join course on score.c_id='01' and course.c_id=score.c_id
order by s_score desc limit 3;
select score.c_id,course.c_name,student.s_name,s_score
from score
join student on student.s_id=score.s_id
join course on score.c_id='02' and course.c_id=score.c_id
order by s_score desc limit 3;
select score.c_id,course.c_name,student.s_name,s_score
from score
join student on student.s_id=score.s_id
join course on score.c_id='03' and course.c_id=score.c_id
order by s_score desc limit 3;
select c.c_id,c.c_name,tmp.number from course c
join (select c_id,count(1) as number from score
where score.s_score<60 group by score.c_id)tmp
on tmp.c_id=c.c_id;
select st.s_id,st.s_name from student st
join (select s_id from score group by s_id having count(c_id) =2)tmp
on st.s_id=tmp.s_id;
select tmp1.man,tmp2.women from
(select count(1) as man from student where s_sex='男')tmp1,
(select count(1) as women from student where s_sex='女')tmp2;
select * from student where s_name like '%风%';
select s1.s_id,s1.s_name,s1.s_sex,count(*) as sameName
from student s1,student s2
where s1.s_name=s2.s_name and s1.s_id<>s2.s_id and s1.s_sex=s2.s_sex
group by s1.s_id,s1.s_name,s1.s_sex;
select * from student where s_birth like '1990%';
select score.c_id,c_name,round(avg(s_score),2) as avgScore from score
join course on score.c_id=course.c_id
group by score.c_id,c_name order by avgScore desc,score.c_id asc;
select score.s_id,s_name,round(avg(s_score),2)as avgScore from score
join student on student.s_id=score.s_id
group by score.s_id,s_name having avg(s_score) >= 85;
select s_name,s_score as mathScore from student
join (select s_id,s_score
from score,course
where score.c_id=course.c_id and c_name='数学')tmp
on tmp.s_score < 60 and student.s_id=tmp.s_id;
select a.s_name,
SUM(case c.c_name when '语文' then b.s_score else 0 end ) as chainese,
SUM(case c.c_name when '数学' then b.s_score else 0 end ) as math,
SUM(case c.c_name when '英语' then b.s_score else 0 end ) as english,
SUM(b.s_score) as sumScore
from student a
join score b on a.s_id=b.s_id
join course c on b.c_id=c.c_id
group by s_name,a.s_id;
注:不用group by
select student.s_id,s_name,c_name,s_score from student
join (select sc.* from score sc
left join(select s_id from score where s_score < 70 group by s_id)tmp
on sc.s_id=tmp.s_id where tmp.s_id is null)tmp2
on student.s_id=tmp2.s_id
join course on tmp2.c_id=course.c_id
order by s_id;
select sc.* from score sc
left join(select s_id from score where s_score < 60 group by s_id)tmp
on sc.s_id=tmp.s_id
where tmp.s_id is null;
– 或(效率低)
select sc.* from score sc
where sc.s_id not in (select s_id from score where s_score < 60 group by s_id);
select s_name,c_name as courseName,tmp.s_score
from student
join (select s_id,s_score,c_name
from score,course
where score.c_id=course.c_id and s_score < 60)tmp
on student.s_id=tmp.s_id;
select student.s_id,s_name,s_score as score_01
from student
join score on student.s_id=score.s_id
where c_id='01' and s_score >= 80;
select course.c_id,course.c_name,count(1)as selectNum
from course
join score on course.c_id=score.c_id
group by course.c_id,course.c_name;
select student.*,tmp3.c_name,tmp3.maxScore
from (select s_id,c_name,max(s_score)as maxScore from score
join (select course.c_id,c_name from course join
(select t_id,t_name from teacher where t_name='张三')tmp
on course.t_id=tmp.t_id)tmp2
on score.c_id=tmp2.c_id group by score.s_id,c_name
order by maxScore desc limit 1)tmp3
join student
on student.s_id=tmp3.s_id;
select distinct a.s_id,a.c_id,a.s_score from score a,score b
where a.c_id <> b.c_id and a.s_score=b.s_score;
select tmp1.* from
(select *,row_number()over(order by s_score desc) ranking
from score where c_id ='01')tmp1
where tmp1.ranking <= 3
union all
select tmp2.* from
(select *,row_number()over(order by s_score desc) ranking
from score where c_id ='02')tmp2
where tmp2.ranking <= 3
union all
select tmp3.* from
(select *,row_number()over(order by s_score desc) ranking
from score where c_id ='03')tmp3
where tmp3.ranking <= 3;
-要求输出课程号和选修人数,查询结果按人数降序排列,若人数相同,按课程号升序排列
select distinct course.c_id,tmp.num from course
join (select c_id,count(1) as num from score group by c_id)tmp
where tmp.num>=5 order by tmp.num desc ,course.c_id asc;
select s_id,count(c_id) as totalCourse
from score
group by s_id
having count(c_id) >= 2;
select student.*
from student,
(select s_id,count(c_id) as totalCourse
from score group by s_id)tmp
where student.s_id=tmp.s_id and totalCourse=3;
-按照出生日期来算,当前月日 < 出生年月的月日则,年龄减一
方法一:
select s_name,s_birth,
(year(CURRENT_DATE)-year(s_birth)-
(case when month(CURRENT_DATE) < month(s_birth) then 1
when month(CURRENT_DATE) = month(s_birth) and day(CURRENT_DATE) < day(s_birth) then 1
else 0 end)
) as age
from student;
方法二:
select s_name,s_birth,
floor((datediff(current_date,s_birth) - floor((year(current_date) - year(s_birth))/4))/365) as age
from student;
方法一:
select * from student where weekofyear(CURRENT_DATE)+1 =weekofyear(s_birth);
方法二:
select s_name,s_sex,s_birth from student
where substring(s_birth,6,2)='10'
and substring(s_birth,9,2)=14;
方法一:
select * from student where weekofyear(CURRENT_DATE)+1 =weekofyear(s_birth);
方法二:
select s_name,s_sex,s_birth from student
where substring(s_birth,6,2)='10'
and substring(s_birth,9,2)>=15
and substring(s_birth,9,2)<=21;
方法一:
select * from student where MONTH(CURRENT_DATE) =MONTH(s_birth);
方法二:
select s_name,s_sex,s_birth from student where substring(s_birth,6,2)='10';
select s_name,s_sex,s_birth from student where substring(s_birth,6,2)='12';