找的练习hive,内容非原创链接 https://www.cnblogs.com/frankdeng/p/9309668.html
[root@master hadoop]# hive
Logging initialized using configuration in jar:file:/usr/local/src/apache-hive-1.2.2-bin/lib/hive-common-1.2.2.jar!/hive-log4j.properties
hive> create database if not exists movie;
OK
Time taken: 1.952 seconds
hive> use movie;
OK
Time taken: 0.082 seconds
hive> create table t_user(
> userid bigint,
> sex string,
> age int,
> occupation string,
> zipcode string)
> row format serde 'org.apache.hadoop.hive.serde2.RegexSerDe'
> with serdeproperties('input.regex'='(.*)::(.*)::(.*)::(.*)::(.*)','output.format.string'='%1$s %2$s %3$s %4$s %5$s')
> stored as textfile;
OK
Time taken: 0.552 seconds
hive> use movie;
OK
Time taken: 0.062 seconds
hive> create table t_movie(
> movieid bigint,
> moviename string,
> movietype string)
> row format serde 'org.apache.hadoop.hive.serde2.RegexSerDe'
> with serdeproperties('input.regex'='(.*)::(.*)::(.*)','output.format.string'='%1$s %2$s %3$s')
> stored as textfile;
OK
Time taken: 0.149 seconds
hive> use movie;
OK
Time taken: 0.034 seconds
hive> create table t_rating(
> userid bigint,
> movieid bigint,
> rate double,
> times string)
> row format serde 'org.apache.hadoop.hive.serde2.RegexSerDe'
> with serdeproperties('input.regex'='(.*)::(.*)::(.*)::(.*)','output.format.string'='%1$s %2$s %3$s %4$s')
> stored as textfile;
OK
Time taken: 0.143 seconds
hive>
hive> load data local inpath "/home/hadoop/movie/users.dat" into table t_user;
Loading data to table movie.t_user
Table movie.t_user stats: [numFiles=1, totalSize=134368]
OK
Time taken: 2.159 seconds
hive> load data local inpath "/home/hadoop/movie/movies.dat" into table t_movie;
Loading data to table movie.t_movie
Table movie.t_movie stats: [numFiles=1, totalSize=171432]
OK
Time taken: 0.764 seconds
hive> load data local inpath "/home/hadoop/movie/ratings.dat" into table t_rating;
Loading data to table movie.t_rating
Table movie.t_rating stats: [numFiles=1, totalSize=24594131]
OK
Time taken: 2.453 seconds
hive>
数据太多,选择3个表前10条内容进行展示
hive> select t.* from t_user t limit 10;
OK
1 F 1 10 48067
2 M 56 16 70072
3 M 25 15 55117
4 M 45 7 02460
5 M 25 20 55455
6 F 50 9 55117
7 M 35 1 06810
8 M 25 12 11413
9 M 25 17 61614
10 F 35 1 95370
Time taken: 0.109 seconds, Fetched: 10 row(s)
hive> select t.* from t_movie t limit 10;
OK
1 Toy Story (1995) Animation|Children's|Comedy
2 Jumanji (1995) Adventure|Children's|Fantasy
3 Grumpier Old Men (1995) Comedy|Romance
4 Waiting to Exhale (1995) Comedy|Drama
5 Father of the Bride Part II (1995) Comedy
6 Heat (1995) Action|Crime|Thriller
7 Sabrina (1995) Comedy|Romance
8 Tom and Huck (1995) Adventure|Children's
9 Sudden Death (1995) Action
10 GoldenEye (1995) Action|Adventure|Thriller
Time taken: 0.127 seconds, Fetched: 10 row(s)
hive> select t.* from t_rating t limit 10;
OK
1 1193 5.0 978300760
1 661 3.0 978302109
1 914 3.0 978301968
1 3408 4.0 978300275
1 2355 5.0 978824291
1 1197 3.0 978302268
1 1287 5.0 978302039
1 2804 5.0 978300719
1 594 4.0 978302268
1 919 4.0 978301368
Time taken: 0.156 seconds, Fetched: 10 row(s)
hive>
hive> create table answer2 as
> select a.moviename as moviename,count(a.moviename) as total
> from t_movie a join t_rating b on a.movieid=b.movieid
> group by a.moviename
> order by total desc
> limit 10;
Total MapReduce CPU Time Spent: 21 seconds 300 msec
OK
Time taken: 137.691 seconds
1、创建了一个 answer2 的表
2、查询的内容为电影名、影评分
3、表来自 t_move 和 t_rating 通过movieid进行连接
4、根据 moviename(电影名) 进行分组
5、降序排列
6、查询数量为10,最终answer2表内容数据10条
hive> select * from answer2;
OK
American Beauty (1999) 3428
Star Wars: Episode IV - A New Hope (1977) 2991
Star Wars: Episode V - The Empire Strikes Back (1980) 2990
Star Wars: Episode VI - Return of the Jedi (1983) 2883
Jurassic Park (1993) 2672
Saving Private Ryan (1998) 2653
Terminator 2: Judgment Day (1991) 2649
Matrix, The (1999) 2590
Back to the Future (1985) 2583
Silence of the Lambs, The (1991) 2578
Time taken: 0.114 seconds, Fetched: 10 row(s)
hive> create table answer3_F as
> select "F" as sex, c.moviename as name, avg(a.rate) as avgrate, count(c.moviename) as total
> from t_rating a
> join t_user b on a.userid=b.userid
> join t_movie c on a.movieid=c