lateral view用于和split、explode等UDTF一起使用的,能将一行数据拆分成多行数据;在此基础上可以对拆分的数据进行聚合,lateral view首先为原始表的每行调用UDTF,UDTF会把一行拆分成一行或者多行,lateral view再把结果组合,产生一个支持别名表的虚拟表。
1、单个LATERAL VIEW
表及数据准备:
drop table if exists lateral_ex1;
create table lateral_ex1(id string,arrlist array,text string)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
COLLECTION ITEMS TERMINATED BY ','
STORED AS TEXTFILE;
cat lateral_ex1.txt
190 1030,1031,1032,1033,1190 select id
191 1030,1031,1032,1033,1190 select id
LOAD DATA LOCAL INPATH '/home/hadoop/nisj/hiveDataType/fileDir/lateral_ex1.txt' OVERWRITE INTO TABLE lateral_ex1;
查询示范:
hive> select * from lateral_ex1;
OK
190 [1030,1031,1032,1033,1190] select id
191 [1030,1031,1032,1033,1190] select id
Time taken: 0.087 seconds, Fetched: 2 row(s)
select id,arrlist,col2,text from lateral_ex1 LATERAL VIEW explode(arrlist) tab1 as col2;
OK
190 [1030,1031,1032,1033,1190] 1030 select id
190 [1030,1031,1032,1033,1190] 1031 select id
190 [1030,1031,1032,1033,1190] 1032 select id
190 [1030,1031,1032,1033,1190] 1033 select id
190 [1030,1031,1032,1033,1190] 1190 select id
191 [1030,1031,1032,1033,1190] 1030 select id
191 [1030,1031,1032,1033,1190] 1031 select id
191 [1030,1031,1032,1033,1190] 1032 select id
191 [1030,1031,1032,1033,1190] 1033 select id
191 [1030,1031,1032,1033,1190] 1190 select id
Time taken: 73.225 seconds, Fetched: 10 row(s)
2、多个LATERAL VIEW
表及数据准备:
drop table if exists lateral_ex2;
create table lateral_ex2(id string,arrlist1 array,arrlist2 array)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
COLLECTION ITEMS TERMINATED BY ','
STORED AS TEXTFILE;
cat lateral_ex2.txt
190 1030,1031,1032,1033,1190 NBA,CX4,AuDi
191 1030,1031,1032,1033,1190 Hive,Oracle,Mysql
LOAD DATA LOCAL INPATH '/home/hadoop/nisj/hiveDataType/fileDir/lateral_ex2.txt' OVERWRITE INTO TABLE lateral_ex2;
查询示范:
hive> select * from lateral_ex2;
OK
190 [1030,1031,1032,1033,1190] ["NBA","CX4","AuDi"]
191 [1030,1031,1032,1033,1190] ["Hive","Oracle","Mysql"]
Time taken: 0.053 seconds, Fetched: 2 row(s)
SELECT id,myCol1,myCol2 FROM lateral_ex2
LATERAL VIEW explode(arrlist1) myTable1 AS myCol1
LATERAL VIEW explode(arrlist2) myTable2 AS myCol2;
OK
190 1030 NBA
190 1030 CX4
190 1030 AuDi
190 1031 NBA
190 1031 CX4
190 1031 AuDi
190 1032 NBA
190 1032 CX4
190 1032 AuDi
190 1033 NBA
190 1033 CX4
190 1033 AuDi
190 1190 NBA
190 1190 CX4
190 1190 AuDi
191 1030 Hive
191 1030 Oracle
191 1030 Mysql
191 1031 Hive
191 1031 Oracle
191 1031 Mysql
191 1032 Hive
191 1032 Oracle
191 1032 Mysql
191 1033 Hive
191 1033 Oracle
191 1033 Mysql
191 1190 Hive
191 1190 Oracle
191 1190 Mysql
Time taken: 24.182 seconds, Fetched: 30 row(s)
3、字符行列转换
表及数据准备:
drop table if exists lateral_ex3;
create table lateral_ex3(dayno string,col1 string,col2 string)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE;
cat lateral_ex3.txt
20141018 aa|bb 7|9|0|3
20141019 cc|dd 6|1|8|5
LOAD DATA LOCAL INPATH '/home/hadoop/nisj/hiveDataType/fileDir/lateral_ex3.txt' OVERWRITE INTO TABLE lateral_ex3;
查询示范:
hive> select * from lateral_ex3;
OK
20141018 aa|bb 7|9|0|3
20141019 cc|dd 6|1|8|5
Time taken: 0.058 seconds, Fetched: 2 row(s)
select dayno,mycol1,mycol2 from lateral_ex3
lateral view explode(split(col1,"\\|")) tb1 as mycol1
lateral view explode(split(col2,"\\|")) tb2 as mycol2;
OK
20141018 aa 7
20141018 aa 9
20141018 aa 0
20141018 aa 3
20141018 bb 7
20141018 bb 9
20141018 bb 0
20141018 bb 3
20141019 cc 6
20141019 cc 1
20141019 cc 8
20141019 cc 5
20141019 dd 6
20141019 dd 1
20141019 dd 8
20141019 dd 5
Time taken: 28.739 seconds, Fetched: 16 row(s)