Hive lateral view使用示范

lateral view用于和split、explode等UDTF一起使用的,能将一行数据拆分成多行数据;在此基础上可以对拆分的数据进行聚合,lateral view首先为原始表的每行调用UDTF,UDTF会把一行拆分成一行或者多行,lateral view再把结果组合,产生一个支持别名表的虚拟表。
1、单个LATERAL VIEW
表及数据准备:
drop table if exists lateral_ex1;
create table lateral_ex1(id string,arrlist array,text string)
ROW FORMAT DELIMITED                                              
FIELDS TERMINATED BY '\t'                                          
COLLECTION ITEMS TERMINATED BY ','
STORED AS TEXTFILE;
cat lateral_ex1.txt
190	1030,1031,1032,1033,1190	select id
191	1030,1031,1032,1033,1190	select id
LOAD DATA LOCAL INPATH '/home/hadoop/nisj/hiveDataType/fileDir/lateral_ex1.txt' OVERWRITE INTO TABLE lateral_ex1;  
查询示范:
hive> select * from lateral_ex1;
OK
190     [1030,1031,1032,1033,1190]      select id
191     [1030,1031,1032,1033,1190]      select id
Time taken: 0.087 seconds, Fetched: 2 row(s)
select id,arrlist,col2,text from lateral_ex1 LATERAL VIEW explode(arrlist) tab1 as col2;
OK
190     [1030,1031,1032,1033,1190]      1030    select id
190     [1030,1031,1032,1033,1190]      1031    select id
190     [1030,1031,1032,1033,1190]      1032    select id
190     [1030,1031,1032,1033,1190]      1033    select id
190     [1030,1031,1032,1033,1190]      1190    select id
191     [1030,1031,1032,1033,1190]      1030    select id
191     [1030,1031,1032,1033,1190]      1031    select id
191     [1030,1031,1032,1033,1190]      1032    select id
191     [1030,1031,1032,1033,1190]      1033    select id
191     [1030,1031,1032,1033,1190]      1190    select id
Time taken: 73.225 seconds, Fetched: 10 row(s)
2、多个LATERAL VIEW
表及数据准备:
drop table if exists lateral_ex2;
create table lateral_ex2(id string,arrlist1 array,arrlist2 array)
ROW FORMAT DELIMITED                                              
FIELDS TERMINATED BY '\t'                                          
COLLECTION ITEMS TERMINATED BY ','
STORED AS TEXTFILE;
cat lateral_ex2.txt
190	1030,1031,1032,1033,1190	NBA,CX4,AuDi
191	1030,1031,1032,1033,1190	Hive,Oracle,Mysql
LOAD DATA LOCAL INPATH '/home/hadoop/nisj/hiveDataType/fileDir/lateral_ex2.txt' OVERWRITE INTO TABLE lateral_ex2;  
查询示范:
hive> select * from lateral_ex2;
OK
190     [1030,1031,1032,1033,1190]      ["NBA","CX4","AuDi"]
191     [1030,1031,1032,1033,1190]      ["Hive","Oracle","Mysql"]
Time taken: 0.053 seconds, Fetched: 2 row(s)

SELECT id,myCol1,myCol2 FROM lateral_ex2
LATERAL VIEW explode(arrlist1) myTable1 AS myCol1
LATERAL VIEW explode(arrlist2) myTable2 AS myCol2;
OK
190     1030    NBA
190     1030    CX4
190     1030    AuDi
190     1031    NBA
190     1031    CX4
190     1031    AuDi
190     1032    NBA
190     1032    CX4
190     1032    AuDi
190     1033    NBA
190     1033    CX4
190     1033    AuDi
190     1190    NBA
190     1190    CX4
190     1190    AuDi
191     1030    Hive
191     1030    Oracle
191     1030    Mysql
191     1031    Hive
191     1031    Oracle
191     1031    Mysql
191     1032    Hive
191     1032    Oracle
191     1032    Mysql
191     1033    Hive
191     1033    Oracle
191     1033    Mysql
191     1190    Hive
191     1190    Oracle
191     1190    Mysql
Time taken: 24.182 seconds, Fetched: 30 row(s)
3、字符行列转换
表及数据准备:

drop table if exists lateral_ex3;
create table lateral_ex3(dayno string,col1 string,col2 string)
ROW FORMAT DELIMITED                                              
FIELDS TERMINATED BY '\t'                                          
STORED AS TEXTFILE;
cat lateral_ex3.txt
20141018	aa|bb	7|9|0|3
20141019	cc|dd	6|1|8|5
LOAD DATA LOCAL INPATH '/home/hadoop/nisj/hiveDataType/fileDir/lateral_ex3.txt' OVERWRITE INTO TABLE lateral_ex3;  
查询示范:
hive> select * from lateral_ex3;
OK
20141018        aa|bb   7|9|0|3
20141019        cc|dd   6|1|8|5
Time taken: 0.058 seconds, Fetched: 2 row(s)

select dayno,mycol1,mycol2 from lateral_ex3 
lateral view explode(split(col1,"\\|")) tb1 as mycol1
lateral view explode(split(col2,"\\|")) tb2 as mycol2;
OK
20141018        aa      7
20141018        aa      9
20141018        aa      0
20141018        aa      3
20141018        bb      7
20141018        bb      9
20141018        bb      0
20141018        bb      3
20141019        cc      6
20141019        cc      1
20141019        cc      8
20141019        cc      5
20141019        dd      6
20141019        dd      1
20141019        dd      8
20141019        dd      5
Time taken: 28.739 seconds, Fetched: 16 row(s)

你可能感兴趣的:(#,Hive,Sql)