有如下数据:
1,1,yuwen,43
2,1,shuxue,55
3,2,yuwen,77
4,2,shuxue,88
5,3,yuwen,98
6,3,shuxue,65
7,3,yingyu,80
求:所有语文课程成绩 大于 数学课程成绩的学生的学号。
建表结果:
+------------+-------------+----------------+---------------+
| course.id | course.sid | course.course | course.score |
+------------+-------------+----------------+---------------+
| 1 | 1 | yuwen | 43 |
| 2 | 1 | shuxue | 55 |
| 3 | 2 | yuwen | 77 |
| 4 | 2 | shuxue | 88 |
| 5 | 3 | yuwen | 98 |
| 6 | 3 | shuxue | 65 |
| 7 | 3 | yingyu | 80 |
+------------+-------------+----------------+---------------+
解决方案一:课程数目有限,直接利用条件判断语句进行列到行的转换。
select * from
(select sid,
max(case course when 'yuwen' then score else 0 end) as yuwen,
max(case course when 'shuxue' then score else 0 end) as shuxue,
max(case course when 'yingyu' then score else 0 end) as yingyu
from course
group by sid) as new_c
where new_c.yuwen>new_c.shuxue ;
结果如下:
+------------+--------------+---------------+---------------+
| new_c.sid | new_c.yuwen | new_c.shuxue | new_c.yingyu |
+------------+--------------+---------------+---------------+
| 3 | 98 | 65 | 80 |
+------------+--------------+---------------+---------------+
解决方案二:利用收集函数,收集所有科目,再进行列行转换。
1)使用collect_set()进行收集
select collect_set(course) cos from course;
["yuwen","shuxue","yingyu"]
2)求出上述结果与course表的笛卡尔积。
set hive.mapred.mode=nonstrict; 笛卡尔积开启
select sid,course,score,cos.cos from
course join (select collect_set(course) cos from course) as cos;
1 yuwen 43 ["yuwen","shuxue","yingyu"]
1 shuxue 55 ["yuwen","shuxue","yingyu"]
2 yuwen 77 ["yuwen","shuxue","yingyu"]
2 shuxue 88 ["yuwen","shuxue","yingyu"]
3 yuwen 98 ["yuwen","shuxue","yingyu"]
3 shuxue 65 ["yuwen","shuxue","yingyu"]
3 yingyu 80 ["yuwen","shuxue","yingyu"]
3)利用max 或 sum 、if 或 case when 语句进行列--行 转换。
select sid,
max(if(course=cour[0], score,0)) yuwen,
max(if(course=cour[1],score,0)) shuxue,
max(if(course=cour[2],score,0)) yingyu
from
(select sid,course,score,c.cos cour from
course join (select collect_set(course) cos from course) as c) ss
group by sid order by sid ;
运行结果:
+------+--------+---------+---------+
| sid | yuwen | shuxue | yingyu |
+------+--------+---------+---------+
| 1 | 43 | 55 | 0 |
| 2 | 77 | 88 | 0 |
| 3 | 98 | 65 | 80 |
+------+--------+---------+---------+
4)求语文大于数学的记录,将上面的语句增加 having 过滤就好了。
select sid,
max(if(course=cour[0], score,0)) yuwen,
max(if(course=cour[1],score,0)) shuxue,
max(if(course=cour[2],score,0)) yingyu
from
(select sid,course,score,c.cos cour from
course join (select collect_set(course) cos from course) as c) ss
group by sid
having yuwen >shuxue;
+------+--------+---------+---------+
| sid | yuwen | shuxue | yingyu |
+------+--------+---------+---------+
| 3 | 98 | 65 | 80 |
+------+--------+---------+---------+
现有一份以下格式的数据:
表示有id为1,2,3的学生选修了课程a,b,c,d,e,f中其中几门:
id course
1,a
1,b
1,c
1,e
2,a
2,c
2,d
2,f
3,a
3,b
3,c
3,e
编写Hive的HQL语句来实现以下结果:
表中的1表示选修,表中的0表示未选修
id a b c d e f
1 1 1 1 0 1 0
2 1 0 1 1 0 1
3 1 1 1 0 1 0
方案一:
select id ,
max(case course when "a" then course else 0 end) as a,
max(case course when "b" then course else 0 end) as b,
max(case course when "c" then course else 0 end) as c,
max(case course when "d" then course else 0 end) as d,
max(case course when "e" then course else 0 end) as e from stu_cour group by id;
方案二:
1):
收集所有课程
(select collect_set(course) arr_cour from stu_cour) as collect;
["a","b","c","e","d","f"]
收集每个学生所学课程
select id, collect_set(course) arr_c from stu_cour group by id order by id;
1 ["a","b","c","e"]
2 ["a","c","d","f"]
3 ["a","b","c","e"]
2):求笛卡尔积 [ 为方便查询,存中间表 ]
create table cour_bak as
select id,arr_c ,arr_cour
from
(select collect_set(course) arr_cour from stu_cour) as collect,
(select id, collect_set(course) arr_c from stu_cour group by id order by id) as per;
1 ["a","b","c","e"] ["a","b","c","e","d","f"]
2 ["a","c","d","f"] ["a","b","c","e","d","f"]
3 ["a","b","c","e"] ["a","b","c","e","d","f"]
3):进行 列-->行 转换。
select id,
if(array_contains(arr_c,arr_cour[0]),1,0) a,
if(array_contains(arr_c,arr_cour[1]),1,0) b,
if(array_contains(arr_c,arr_cour[2]),1,0) c,
if(array_contains(arr_c,arr_cour[3]),1,0) e,
if(array_contains(arr_c,arr_cour[4]),1,0) d,
if(array_contains(arr_c,arr_cour[5]),1,0) f
from cour_bak;
或者以下:
//两者等效的
select id,
if(array_contains(arr_c,"a"),1,0) a,
if(array_contains(arr_c,"b"),1,0) b,
if(array_contains(arr_c,"c"),1,0) c,
if(array_contains(arr_c,"e"),1,0) e,
if(array_contains(arr_c,"d"),1,0) d,
if(array_contains(arr_c,"f"),1,0) f
from cour_bak;
+-----+----+----+----+----+----+----+
| id | a | b | c | e | d | f |
+-----+----+----+----+----+----+----+
| 1 | 1 | 1 | 1 | 1 | 0 | 0 |
| 2 | 1 | 0 | 1 | 0 | 1 | 1 |
| 3 | 1 | 1 | 1 | 1 | 0 | 0 |
+-----+----+----+----+----+----+----+