Hive SQL题库(初级)

第一章 环境准备

1.1 建表语句

hive>
-- 创建学生表
DROP TABLE IF EXISTS student;
create table if not exists student_info(
    stu_id string COMMENT '学生id',
    stu_name string COMMENT '学生姓名',
    birthday string COMMENT '出生日期',
    sex string COMMENT '性别'
) 
row format delimited fields terminated by ',' 
stored as textfile;

-- 创建课程表
DROP TABLE IF EXISTS course;
create table if not exists course_info(
    course_id string COMMENT '课程id',
    course_name string COMMENT '课程名',
    tea_id string COMMENT '任课老师id'
) 
row format delimited fields terminated by ',' 
stored as textfile;

-- 创建老师表
DROP TABLE IF EXISTS teacher;
create table if not exists teacher_info(
    tea_id string COMMENT '老师id',
    tea_name string COMMENT '学生姓名'
) 
row format delimited fields terminated by ',' 
stored as textfile;

-- 创建分数表
DROP TABLE IF EXISTS score;
create table if not exists score_info(
    stu_id string COMMENT '学生id',
    course_id string COMMENT '课程id',
    score int COMMENT '成绩'
) 
row format delimited fields terminated by ',' 
stored as textfile;

1.2 数据准备

(1)创建/opt/module/data目录

[atguigu@hadoop102 module]$ mkdir data

(2)将如下4个文件放到/opt/module/data目录下

[atguigu@hadoop102 data]$ vim student_info.txt

001,彭于晏,1995-05-16,男
002,胡歌,1994-03-20,男
003,周杰伦,1995-04-30,男
004,刘德华,1998-08-28,男
005,唐国强,1993-09-10,男
006,陈道明,1992-11-12,男
007,陈坤,1999-04-09,男
008,吴京,1994-02-06,男
009,郭德纲,1992-12-05,男
010,于谦,1998-08-23,男
011,潘长江,1995-05-27,男
012,杨紫,1996-12-21,女
013,蒋欣,1997-11-08,女
014,赵丽颖,1990-01-09,女
015,刘亦菲,1993-01-14,女
016,周冬雨,1990-06-18,女
017,范冰冰,1992-07-04,女
018,李冰冰,1993-09-24,女
019,邓紫棋,1994-08-31,女
020,宋丹丹,1991-03-01,女


[atguigu@hadoop102 data]$ vim course_info.txt

01,语文,1003
02,数学,1001
03,英语,1004
04,体育,1002
05,音乐,1002



[atguigu@hadoop102 data]$ vim teacher_info.txt

1001,张高数
1002,李体音
1003,王子文
1004,刘丽英

[atguigu@hadoop102 data]$ vim score_info.txt

001,01,94
002,01,74
004,01,85
005,01,64
006,01,71
007,01,48
008,01,56
009,01,75
010,01,84
011,01,61
012,01,44
013,01,47
014,01,81
015,01,90
016,01,71
017,01,58
018,01,38
019,01,46
020,01,89
001,02,63
002,02,84
004,02,93
005,02,44
006,02,90
007,02,55
008,02,34
009,02,78
010,02,68
011,02,49
012,02,74
013,02,35
014,02,39
015,02,48
016,02,89
017,02,34
018,02,58
019,02,39
020,02,59
001,03,79
002,03,87
004,03,89
005,03,99
006,03,59
007,03,70
008,03,39
009,03,60
010,03,47
011,03,70
012,03,62
013,03,93
014,03,32
015,03,84
016,03,71
017,03,55
018,03,49
019,03,93
020,03,81
001,04,54
002,04,100
004,04,59
005,04,85
007,04,63
009,04,79
010,04,34
013,04,69
014,04,40
016,04,94
017,04,34
020,04,50
005,05,85
007,05,63
009,05,79
015,05,59
018,05,87

1.3 插入数据

(1)插入数据

hive>
load data local inpath '/opt/module/data/student_info.txt' into table student_info;

load data local inpath '/opt/module/data/course_info.txt' into table course_info;

load data local inpath '/opt/module/data/teacher_info.txt' into table teacher_info;

load data local inpath '/opt/module/data/score_info.txt' into table score_info;

(2)验证插入数据情况

hive>
select * from student_info limit 5;
select * from course_info limit 5;
select * from teacher_info limit 5;
select * from score_info limit 5;

第二章 简单查询

2.1 查找特定条件

2.1.1 查询姓名中带“冰”的学生名单

select 
       *
from
     student_info
where stu_name like '%冰%';

2.1.3 检索课程编号为“04”且分数小于60的学生的课程信息,结果按分数降序排列

select
    stu_id,
    course_id,
    score
from score_info
where course_id ='04' and score<60
order by score desc;

2.1.4 查询数学成绩不及格的学生和其对应的成绩,按照学号升序排序

答案一:


SELECT
    student.stu_id,
    student.stu_name,
    score.score 
FROM
    student_info student
    INNER JOIN score_info score ON student.stu_id = score.stu_id
    INNER JOIN course_info course ON score.course_id = course.course_id 
WHERE
    course.course_name = '数学' 
    AND score.score < 60 
ORDER BY
    student.stu_id ASC;

答案二:

select
    s.stu_id,
    s.stu_name,
    t1.score
from student_info s
join (
    select
        *
    from score_info
    where course_id=(select course_id from course_info where course_name='数学') and score < 60
    ) t1 on s.stu_id = t1.stu_id
order by s.stu_id;

第三章 汇总分析

3.1 汇总分析

3.1.1 查询编号为“02”的课程的总成绩

select sum(score) from score_info where  course_id='02';

3.2 分组

3.2.1 查询各科成绩最高和最低的分,以如下的形式显示:课程号,最高分,最低分

select course_id ,max(score) ,min(score)  from score_info group by course_id;

3.2.2 查询每门课程有多少学生参加了考试(有考试成绩)

select course_id,count(stu_id) from score_info group by course_id;

3.3 分组结果的条件

3.3.3 查询同姓(假设每个学生姓名的第一个字为姓)的学生名单并统计同姓人数大于2的姓

答案一:

select substring(stu_name,0,1)   ,count(*) as cnt from student_info group by substring(stu_name,0,1) having cnt>=2;

答案二:

select
    t1.first_name,
    count(*) count_first_name
from (
         select
             stu_id,
             stu_name,
             substr(stu_name,0,1) first_name
         from student_info
     ) t1
group by t1.first_name
having count_first_name >= 2;

3.4 查询结果排序&分组指定条件

3.4.2 按照如下格式显示学生的语文、数学、英语三科成绩,没有成绩的输出为0,按照学生的有效平均成绩降序显示

学生id 语文 数学 英语 有效课程数 有效平均成绩

答案一:

select
    si.stu_id,
    sum(if(ci.course_name='语文',score,0))  `语文`,
    sum(if(ci.course_name='数学',score,0))  `数学`,
    sum(if(ci.course_name='英语',score,0))  `英语`,
    count(*)  `有效课程数`,
    avg(si.score)  `平均成绩`
from
    score_info si
        join
    course_info ci
    on
            si.course_id=ci.course_id
group by
    si.stu_id
order by
    `平均成绩` desc;

答案二:

SELECT
    tab1.stu_id,
    tab1.score,
    tab2.score,
    tab3.score,
    tab4.cnt,
    tab5.avg_score 
FROM
    (
    SELECT
        stu.stu_id,
    IF
        ( si.score IS NULL, 0, si.score ) AS score 
    FROM
        student_info stu
        INNER JOIN score_info si ON stu.stu_id = si.stu_id 
    WHERE
        si.course_id = '01' 
    ) tab1
    FULL JOIN (
    SELECT
        stu.stu_id,
    IF
        ( si.score IS NULL, 0, si.score ) AS score 
    FROM
        student_info stu
        INNER JOIN score_info si ON stu.stu_id = si.stu_id 
    WHERE
        si.course_id = '02' 
    ) tab2 ON tab1.stu_id = tab2.stu_id
    FULL JOIN (
    SELECT
        stu.stu_id,
    IF
        ( si.score IS NULL, 0, si.score ) AS score 
    FROM
        student_info stu
        INNER JOIN score_info si ON stu.stu_id = si.stu_id 
    WHERE
        si.course_id = '03' 
    ) tab3 ON tab3.stu_id = tab2.stu_id
    FULL JOIN (
    SELECT
        stu.stu_id,
        count(*) AS cnt 
    FROM
        student_info stu
        INNER JOIN score_info si ON stu.stu_id = si.stu_id 
    WHERE
        si.score IS NOT NULL 
    GROUP BY
        stu.stu_id 
    ) tab4 ON tab4.stu_id = tab3.stu_id
    FULL JOIN (
    SELECT
        stu.stu_id,
        avg( si.score ) AS avg_score 
    FROM
        student_info stu
        INNER JOIN score_info si ON stu.stu_id = si.stu_id 
    WHERE
        si.score IS NOT NULL 
    GROUP BY
        stu.stu_id 
    ) tab5 ON tab5.stu_id = tab4.stu_id 
ORDER BY
    avg_score DESC;

3.4.3 查询一共参加三门课程且其中一门为语文课程的学生的id和姓名

答案一:

select stu_id,stu_name
from student_info
where stu_id in (select stu_id
                 from (
                          select stu_id, count(*) course_num, collect_set(course_id) course_list
                          from score_info
                          group by stu_id
                          having course_num = 3
                             and array_contains(course_list, '01')) t1);

答案二:

select
    t2.stu_id,
    s.stu_name
from (
         select t1.stu_id
         from (
                  select stu_id,
                         course_id
                  from score_info
                  where stu_id in (
                      select stu_id
                      from score_info
                      where course_id = "01"
                  )
              ) t1
         group by t1.stu_id
         having count(t1.course_id) = 3
     ) t2
join student_info s on t2.stu_id = s.stu_id;

输出:

stu_id       stu_name
006          陈道明
008          吴京
011          潘长江
012          杨紫
019          邓紫棋

第四章 复杂查询

4.1 子查询

4.1.1 查询所有课程成绩均小于60分的学生的学号、姓名

答案一:

select stu_id
from score_info
where score < 60
  and stu_id not in (select stu_id from score_info where score >= 60)

答案二:

select s.stu_id,s.stu_name from (
                  select stu_id, sum(if(score >= 60, 1, 0)) flag
                  from score_info
                  group by stu_id
                  having flag = 0
                  )t1 join student_info s on s.stu_id=t1.stu_id;

结果:

s.stu_id  s.stu_name
008          吴京
017          范冰冰

第五章 多表查询

5.1 表联结

5.1.1 查询有两门以上的课程不及格的同学的学号及其平均成绩

① 先找出有两门以上不及格的学生名单,按照学生分组,过滤组内成绩低于60的并进行count,count>=2。

② 接着做出一张表查询学生的平均成绩并和上一个子查询中的学生学号进行连接

select t1.stu_id, t1.avg_score
from (
         select stu_id, sum(if(score < 60, 1, 0)) cn, avg(score) avg_score
         from score_info
         group by stu_id
         having cn >= 2) t1
         join student_info s on t1.stu_id = s.stu_id;

5.2 多表连接

5.2.6 查询学过“李体音”老师所教的所有课的同学的学号、姓名

答案一:

select stu_id, stu_name
from student_info
where stu_id in (select stu_id
                 from score_info
                 where course_id in (select course_id
                                     from course_info
                                     where tea_id in (select tea_id from teacher_info where tea_name = '李体音'))
                 group by stu_id
                 having count(*) = 2);

答案二:

select
    t1.stu_id,
    si.stu_name
from
(
    select
        stu_id
    from score_info si
    where course_id in
    (
        select
           course_id
        from course_info c
        join teacher_info t
        on c.tea_id = t.tea_id
        where tea_name='李体音'      --李体音教的所有课程
    )
    group by stu_id
    having count(*)=2       --学习所有课程的学生
)t1
left join student_info si
on t1.stu_id=si.stu_id;

结果:

s.stu_id    s.stu_name
005       唐国强
007       陈坤
009       郭德纲
Time taken: 27.16 seconds, Fetched: 3 row(s)

5.2.7 查询学过“李体音”老师所讲授的任意一门课程的学生的学号、姓名

答案一:

select stu_id,stu_name
from student_info
where stu_id in (
    select stu_id
    from score_info
    where course_id in (
        select course_id
        from course_info
        where tea_id in (select tea_id from teacher_info where tea_name = '李体音')
    ));

答案二:

select t1.stu_id,
       si.stu_name
from (
         select stu_id
         from score_info si
         where course_id in
               (
                   select course_id
                   from course_info c
                            join teacher_info t
                                 on c.tea_id = t.tea_id
                   where tea_name = '李体音'
               )
         group by stu_id
     ) t1
         left join student_info si
                   on t1.stu_id = si.stu_id;

结果:

s.stu_id    s.stu_name
001       彭于晏
002       胡歌
004       刘德华
005       唐国强
007       陈坤
009       郭德纲
010       于谦
013       蒋欣
014       赵丽颖
015       刘亦菲
016       周冬雨
017       范冰冰
018       李冰冰
020       宋丹丹

5.2.8 查询没学过"李体音"老师讲授的任一门课程的学生姓名

答案一:

select stu_id,stu_name
from student_info
where stu_id not in (
    select stu_id
    from score_info
    where course_id in (
        select course_id
        from course_info
        where tea_id in (select tea_id from teacher_info where tea_name = '李体音')
    ));

答案二:

select
    stu_id,
    stu_name
from student_info
where stu_id not in
(
    select
        stu_id
    from score_info si
    where course_id in
    (
        select
           course_id
        from course_info c
        join teacher_info t
        on c.tea_id = t.tea_id
        where tea_name='李体音'
    )
    group by stu_id
);

结果:

stu_id  stu_name
003     周杰伦
006     陈道明
008     吴京
011     潘长江
012     杨紫
019     邓紫棋

5.2.9 查询至少有一门课与学号为“001”的学生所学课程相同的学生的学号和姓名

答案一:

select stu_id, stu_name
from student_info
where stu_id in (select stu_id
                 from score_info
                 where course_id in (select course_id from score_info where stu_id = '001'))
  and stu_id != '001';

答案二:

select si.stu_id,
       si.stu_name
from score_info sc
         join student_info si
              on sc.stu_id = si.stu_id
where sc.course_id in
      (
          select course_id
          from score_info
          where stu_id = '001' --001的课程
      )
  and sc.stu_id <> '001' --排除001学生
group by si.stu_id, si.stu_name;

结果:

s1.stu_id     s2.stu_name
002          胡歌
004          刘德华
005          唐国强
006          陈道明
007          陈坤
008          吴京
009          郭德纲
010          于谦
011          潘长江
012          杨紫
013          蒋欣
014          赵丽颖
015          刘亦菲
016          周冬雨
017          范冰冰
018          李冰冰
019          邓紫棋
020          宋丹丹

你可能感兴趣的:(hive,hive,sql,hadoop)