hive> select Sno,Sname from student;
hive> select distinct Sname from student inner join sc on student.Sno=Sc.Sno;
hive> select count(distinct Sno)count from student;
hive> select avg(distinct Grade) from sc where Cno=1;
hive> select Cno,avg(Grade) from sc group by Cno;
select Grade from sc where Cno=1 sort by Grade desc limit 1;
(注意比较:select * from sc where Cno=1 sort by Grade
select Grade from sc where Cno=1 order by Grade)
* 求各个课程号及相应的选课人数
hive> select Cno,count(1) from sc group by Cno;
hive> select Sno from (select Sno,count(Cno) CountCno from sc group by Sno)a where a.CountCno>3;
或 hive> select Sno from sc group by Sno having count(Cno)>3;
strict模式下(hive.mapred.mode=strict),order by 语句必须跟着limit语句,但是在nonstrict下就不是必须的,这样做的理由是必须有一个reduce对最终的结果进行排序,如果最后输出的行数过多,一个reduce需要花费很长的时间。
hive> set hive.mapred.mode=strict; <默认nonstrict>
hive> select Sno from student order by Sno;
会报错,以下是报错信息
FAILED: Error in semantic analysis: 1:33 In strict mode, if ORDER BY is specified, LIMIT must also be specified. Error encountered near token 'Sno'
set mapred.reduce.tasks=在sort by可以指定,在用sort by的时候,如果没有指定列,它会随机的分配到不同的reduce里去。distribute by 按照指定的字段对数据进行划分到不同的输出reduce中。此方法会根据性别划分到不同的reduce中 ,然后按年龄排序并输出到不同的文件中。
查询学生信息,按性别分区,在分区内按年龄有序
hive> set mapred.reduce.tasks=2;
hive> insert overwrite local directory '/home/hadoop/out'
select * from student distribute by Sex sort by Sage;
—-Join查询,join只支持等值连接(这只是在大部分人的眼中,因为在老版本的hive中只能够进行等值查询。现在改版了,可以进行不等值的查询,但还是不用,因为在使用a.key>b.key的时候Map会爆炸的)
例如:
SELECT a.* FROM a JOIN b ON (a.id = b.id)
SELECT a.* FROM a JOIN b
ON (a.id = b.id AND a.department = b.department)
是正确的,然而:
SELECT a.* FROM a JOIN b ON (a.id>b.id)
是错误的。
hive> select student.*,sc.* from student join sc on (student.Sno =sc.Sno);
hive>select student.Sname,course.Cname,sc.Grade from student join sc on student.Sno=sc.Sno join course on sc.cno=course.cno;
hive> select student.Sname,sc.Grade from student join sc on student.Sno=sc.Sno where sc.Cno=2 and sc.Grade>90;
hive> select student.Sname,sc.Cno from student left outer join sc on student.Sno=sc.Sno;
如果student的sno值对应的sc在中没有值,则会输出student.Sname null.如果用right out join会保留右边的值,左边的为null。
Join 发生在WHERE 子句之前。如果你想限制 join 的输出,应该在 WHERE 子句中写过滤条件——或是在join 子句中写。
* —-LEFT SEMI JOIN Hive 当前没有实现 IN/EXISTS 子查询,可以用 LEFT SEMI JOIN 重写子查询语句
重写以下子查询为LEFT SEMI JOIN
SELECT a.key, a.value
FROM a
WHERE a.key exist in
(SELECT b.key
FROM B);
可以被重写为:
SELECT a.key, a.val
FROM a LEFT SEMI JOIN b on (a.key = b.key)
hive> select s1.Sname from student s1 left semi join student s2 on s1.Sdept=s2.Sdept and s2.Sname='刘晨';
select * from student s1 left join student s2 on s1.Sdept=s2.Sdept and s2.Sname='刘晨';
select * from student s1 right join student s2 on s1.Sdept=s2.Sdept and s2.Sname='刘晨';
select * from student s1 inner join student s2 on s1.Sdept=s2.Sdept and s2.Sname='刘晨';
select * from student s1 left semi join student s2 on s1.Sdept=s2.Sdept and s2.Sname='刘晨';
select s1.Sname from student s1 right semi join student s2 on s1.Sdept=s2.Sdept and s2.Sname='刘晨';
create table student(Sno int,Sname string,Sex string,Sage int,Sdept string)
row format delimited
fields terminated by ','stored as textfile;
create table sc(Sno int,Cno int,Grade int)
row format delimited
fields terminated by ','
stored as textfile;
create table course(Cno int,Cname string)
row format delimited
fields terminated by ','
stored as textfile;
load data local inpath '/home/hadoop/Desktop/hive/students.txt' overwrite into table student;
load data local inpath '/home/hadoop/Desktop/hive/sc.txt' overwrite into table sc;
load data local inpath '/home/hadoop/Desktop/hive/course.txt' overwrite into table course;
student
95001,李勇,男,20,CS
95002,刘晨,女,19,IS
95003,王敏,女,22,MA
95004,张立,男,19,IS
95005,刘刚,男,18,MA
95006,孙庆,男,23,CS
95007,易思玲,女,19,MA
95008,李娜,女,18,CS
95009,梦圆圆,女,18,MA
95010,孔小涛,男,19,CS
95011,包小柏,男,18,MA
95012,孙花,女,20,CS
95013,冯伟,男,21,CS
95014,王小丽,女,19,CS
95015,王君,男,18,MA
95016,钱国,男,21,MA
95017,王风娟,女,18,IS
95018,王一,女,19,IS
95019,邢小丽,女,19,IS
95020,赵钱,男,21,IS
95021,周二,男,17,MA
95022,郑明,男,20,MA
sc
95001,1,81
95001,2,85
95001,3,88
95001,4,70
95002,2,90
95002,3,80
95002,4,71
95002,5,60
95003,1,82
95003,3,90
95003,5,100
95004,1,80
95004,2,92
95004,4,91
95004,5,70
95005,1,70
95005,2,92
95005,3,99
95005,6,87
95006,1,72
95006,2,62
95006,3,100
95006,4,59
95006,5,60
95006,6,98
95007,3,68
95007,4,91
95007,5,94
95007,6,78
95008,1,98
95008,3,89
95008,6,91
95009,2,81
95009,4,89
95009,6,100
95010,2,98
95010,5,90
95010,6,80
95011,1,81
95011,2,91
95011,3,81
95011,4,86
95012,1,81
95012,3,78
95012,4,85
95012,6,98
95013,1,98
95013,2,58
95013,4,88
95013,5,93
95014,1,91
95014,2,100
95014,4,98
95015,1,91
95015,3,59
95015,4,100
95015,6,95
95016,1,92
95016,2,99
95016,4,82
95017,4,82
95017,5,100
95017,6,58
95018,1,95
95018,2,100
95018,3,67
95018,4,78
95019,1,77
95019,2,90
95019,3,91
95019,4,67
95019,5,87
95020,1,66
95020,2,99
95020,5,93
95021,2,93
95021,5,91
95021,6,99
95022,3,69
95022,4,93
95022,5,82
95022,6,100
course
1,数据库
2,数学
3,信息系统
4,操作系统
5,数据结构
6,数据处理