create table tableName(
......
colName array<基本类型>
......
)
说明:下标从0开始,越界不报错,以null代替
测试数据
zhangsan 78,89,92,96
lisi 67,75,83,94
王五 23,12
create table if not exists arr1(
name string,
scores array<string>
)
row format delimited
fields terminated by '\t'
stored as textfile ;
load data local inpath '/data/arr1.txt' into table arr1;
select * from arr1;
结果
+--------+---------------+
|name |scores |
+--------+---------------+
|zhangsan|["78,89,92,96"]|
|lisi |["67,75,83,94"]|
|王五 |["23,12"] |
+--------+---------------+
select scores[0] from arr1;
结果
+-----------+
|c0 |
+-----------+
|78,89,92,96|
|67,75,83,94|
|23,12 |
+-----------+
说明上面这种建表方式是不对的,应该用下面这种,还需要指定数组的分隔符
create table if not exists arr2
(
name string,
scores array<String>
)
row format delimited
fields terminated by '\t'
collection items terminated by ','
;
load data local inpath '/data/arr1.txt' into table arr2;
select *
from arr2;
结果如下
+--------+---------------------+
|name |scores |
+--------+---------------------+
|zhangsan|["78","89","92","96"]|
|lisi |["67","75","83","94"]|
|王五 |["23","12"] |
+--------+---------------------+
select scores[0] from arr2;
结果
+--+
|c0|
+--+
|78|
|67|
|23|
+--+
select * from arr1;
select name,scores[1] from arr2 where size(scores) > 3;
+--------+--+
|name |c1|
+--------+--+
|zhangsan|89|
|lisi |75|
+--------+--+
--统计arr2中的每个人的总成绩
select scores[0]+scores[1]+nvl(scores[2],0)+nvl(scores[3],0) from arr2;
想要一种效果:也就是将数组类型的数据元素展开,换句话说,就是列转行
zhangsan 78,89,92,96
lisi 67,75,83,94
王五 23,12
将上述效果转成下面的效果,更方便统计每个人的总成绩。
zhangsan 78
zhangsan 89
zhangsan 92
zhangsan 96
lisi 67
lisi 75
lisi 83
lisi 94
王五 23
王五 12
简介
- explode:
展开函数(UDTF函数中的一种),作用是:接受一个数据行,然后返回产生多个数据行
- lateral view:虚拟表。
会将UDTF函数生成的结果放到一个虚拟表中,然后这个虚拟表会和输入行进行join来达到数据聚合的目的
案例:
select explode(score) score from arr2;
+---+
|col|
+---+
|78 |
|89 |
|92 |
|96 |
|67 |
|75 |
|83 |
|94 |
|23 |
|12 |
+---+
select name,cj from arr2 lateral view explode(scores) subview as cj;
+--------+--+
|name |cj|
+--------+--+
|zhangsan|78|
|zhangsan|89|
|zhangsan|92|
|zhangsan|96|
|lisi |67|
|lisi |75|
|lisi |83|
|lisi |94|
|王五 |23|
|王五 |12|
+--------+--+
- 统计每个学生的总成绩:
select name,sum(cj) from arr2 lateral view explode(scores) subview as cj
group by name;
+--------+---+
|name |_c1|
+--------+---+
|lisi |319|
|zhangsan|355|
|王五 |35 |
+--------+---+
还可以将展开的表创建一个新表
create table arr_tmp as
select name,cj from arr2 lateral view explode(scores) subview as cj;
collection items terminated by ','
explode
结合lateral view
展开