Hive(五)复杂数据类型和分区表

复杂数据类型

主要学习两方面:如何存/取数据

array

array要求数据类型必须一致,下列是例子:

[root@10-9-15-140 data]# cat hive_array.txt
zhangsan	beijing,shanghai,tianjin,hangzhou
lisi	changchu,chengdu,wuhan,beijing
# 存数据
hive (d1_hive)> create table hive_array(
              > name string,
              >  work_place array
              > )
              > row format delimited fields terminated by '\t'
              > COLLECTION ITEMS TERMINATED BY ',';
          
hive (d1_hive)> load data local inpath '/home/hadoop/data/hive_array.txt' overwrite into table hive_array;
Loading data to table d1_hive.hive_array
Table d1_hive.hive_array stats: [numFiles=1, numRows=0, totalSize=79, rawDataSize=0]
OK
Time taken: 1.366 seconds
hive (d1_hive)>
hive (d1_hive)> select * from hive_array;
OK
hive_array.name	hive_array.work_place
zhangsan	["beijing","shanghai","tianjin","hangzhou"]
lisi	["changchu","chengdu","wuhan","beijing"]
Time taken: 0.368 seconds, Fetched: 2 row(s)
hive (d1_hive)>
# 取数据,下标从0开始
hive (d1_hive)> select work_place[0],work_place[10] from hive_array;
OK
_c0	_c1
beijing	NULL
changchu	NULL
Time taken: 0.143 seconds, Fetched: 2 row(s)
hive (d1_hive)> 
# 判断array是否还有指定数据
hive (d1_hive)> select * from hive_array where array_contains(work_place,'tianjin');
OK
hive_array.name	hive_array.work_place
zhangsan	["beijing","shanghai","tianjin","hangzhou"]
Time taken: 0.132 seconds, Fetched: 1 row(s)
hive (d1_hive)> 
# array长度
hive (d1_hive)> select name,size(work_place) as num from hive_array;
OK
name	num
zhangsan	4
lisi	4
Time taken: 0.116 seconds, Fetched: 2 row(s)
hive (d1_hive)>

map

map要求数据key,value类型必须为指定数据类型,下列是例子:

[root@10-9-15-140 data]# cat hive_map.txt
1,zhangsan,father:xiaoming#mother:xiaohuang#brother:xiaoxu,28
2,lisi,father:mayun#mother:huangyi#brother:guanyu,22
3,wangwu,father:wangjianlin#mother:ruhua#sister:jingtian,29
4,mayun,father:mayongzhen#mother:angelababy,26
[root@10-9-15-140 data]#
# 存数据
hive (d1_hive)> create table hive_map(
    

你可能感兴趣的:(Hive)