主要学习两方面:如何存/取数据
array要求数据类型必须一致,下列是例子:
[root@10-9-15-140 data]# cat hive_array.txt
zhangsan beijing,shanghai,tianjin,hangzhou
lisi changchu,chengdu,wuhan,beijing
# 存数据
hive (d1_hive)> create table hive_array(
> name string,
> work_place array
> )
> row format delimited fields terminated by '\t'
> COLLECTION ITEMS TERMINATED BY ',';
hive (d1_hive)> load data local inpath '/home/hadoop/data/hive_array.txt' overwrite into table hive_array;
Loading data to table d1_hive.hive_array
Table d1_hive.hive_array stats: [numFiles=1, numRows=0, totalSize=79, rawDataSize=0]
OK
Time taken: 1.366 seconds
hive (d1_hive)>
hive (d1_hive)> select * from hive_array;
OK
hive_array.name hive_array.work_place
zhangsan ["beijing","shanghai","tianjin","hangzhou"]
lisi ["changchu","chengdu","wuhan","beijing"]
Time taken: 0.368 seconds, Fetched: 2 row(s)
hive (d1_hive)>
# 取数据,下标从0开始
hive (d1_hive)> select work_place[0],work_place[10] from hive_array;
OK
_c0 _c1
beijing NULL
changchu NULL
Time taken: 0.143 seconds, Fetched: 2 row(s)
hive (d1_hive)>
# 判断array是否还有指定数据
hive (d1_hive)> select * from hive_array where array_contains(work_place,'tianjin');
OK
hive_array.name hive_array.work_place
zhangsan ["beijing","shanghai","tianjin","hangzhou"]
Time taken: 0.132 seconds, Fetched: 1 row(s)
hive (d1_hive)>
# array长度
hive (d1_hive)> select name,size(work_place) as num from hive_array;
OK
name num
zhangsan 4
lisi 4
Time taken: 0.116 seconds, Fetched: 2 row(s)
hive (d1_hive)>
map要求数据key,value类型必须为指定数据类型,下列是例子:
[root@10-9-15-140 data]# cat hive_map.txt
1,zhangsan,father:xiaoming#mother:xiaohuang#brother:xiaoxu,28
2,lisi,father:mayun#mother:huangyi#brother:guanyu,22
3,wangwu,father:wangjianlin#mother:ruhua#sister:jingtian,29
4,mayun,father:mayongzhen#mother:angelababy,26
[root@10-9-15-140 data]#
# 存数据
hive (d1_hive)> create table hive_map(