hive-复合数据类型

数组

需求:

加入有一个表,其中的字段为array类型
表数据:
1,zhangsan,数学:语文:英语:生物
2,lisi,数学:语文
3,wangwu,化学:计算机:java编程

建表:

create table t_xuanxiu(uid string,name string,kc array)
row format delimited
fields terminated by ','
collection items terminated by ':';

查看表信息:

0: jdbc:hive2://slave01:10000> desc t_xuanxiu;
+-----------+----------------+----------+
| col_name  |   data_type    | comment  |
+-----------+----------------+----------+
| uid       | string         |          |
| name      | string         |          |
| kc        | array  |          |
+-----------+----------------+----------+
3 rows selected (0.191 seconds)

导入数据并查看:

0: jdbc:hive2://slave01:10000> load data local inpath '/home/hadoop/test/xuanxiu.dat' into table t_xuanxiu;
No rows affected (1.257 seconds)
0: jdbc:hive2://slave01:10000> select * from t_xuanxiu;
+----------------+-----------------+------------------------+
| t_xuanxiu.uid  | t_xuanxiu.name  |      t_xuanxiu.kc      |
+----------------+-----------------+------------------------+
| 1              | zhangsan        | ["数学","语文","英语","生物"]  |
| 2              | lisi            | ["数学","语文"]            |
| 3              | wangwu          | ["化学","计算机","java编程"]  |
+----------------+-----------------+------------------------+
3 rows selected (0.302 seconds)
-- 根据索引获取数组中的元素
0: jdbc:hive2://slave01:10000> select kc[1] from t_xuanxiu;
+------+
| _c0  |
+------+
| 语文   |
| 语文   |
| 计算机  |
+------+
3 rows selected (0.306 seconds)
-- 根据索引超出数组的范围,返回NULL
0: jdbc:hive2://slave01:10000> select kc[2] from t_xuanxiu;
+---------+
|   _c0   |
+---------+
| 英语      |
| NULL    |
| java编程  |
+---------+
3 rows selected (0.302 seconds)

-- 获取数组的大小
0: jdbc:hive2://slave01:10000> select size(kc) from t_xuanxiu;
+------+
| _c0  |
+------+
| 4    |
| 2    |
| 3    |
+------+
3 rows selected (0.435 seconds)
-- 判断数组中是否包含某个元素
0: jdbc:hive2://slave01:10000> select array_contains(kc,'数学') from t_xuanxiu;
+--------+
|  _c0   |
+--------+
| true   |
| true   |
| false  |
+--------+
3 rows selected (0.309 seconds)

Map类型

需求:

-- 有如下数据,希望存入到表结构中
1,zhangsan,father:xiaoming#mother:xiaohuang#brother:xiaoxu,28
2,lisi,father:mayun#mother:huangyi#brother:guanyu,22
3,wangwu,father:wangjianlin#mother:ruhua#sister:jingtian,29
4,mayun,father:mayongzhen#mother:angelababy,26

建表映射上述数据

create table t_family(id int,name string,family_members map,age int)
row format delimited fields terminated by ',' -- 字段属性分隔符
collection items terminated by '#' -- 集合元素的分隔符
map keys terminated by ':'; -- map中key和value的分隔符

-- 查看表结构
0: jdbc:hive2://slave01:10000> desc t_family;
+-----------------+---------------------+----------+
|    col_name     |      data_type      | comment  |
+-----------------+---------------------+----------+
| id              | int                 |          |
| name            | string              |          |
| family_members  | map  |          |
| age             | int                 |          |
+-----------------+---------------------+----------+
4 rows selected (0.263 seconds)

--导入数据并查询
0: jdbc:hive2://slave01:10000> load data local inpath '/home/hadoop/test/map.dat' into table t_family;
No rows affected (0.703 seconds)
0: jdbc:hive2://slave01:10000> select * from t_family;
+--------------+----------------+----------------------------------------------------+---------------+
| t_family.id  | t_family.name  |              t_family.family_members               | t_family.age  |
+--------------+----------------+----------------------------------------------------+---------------+
| 1            | zhangsan       | {"father":"xiaoming","mother":"xiaohuang","brother":"xiaoxu"} | 28            |
| 2            | lisi           | {"father":"mayun","mother":"huangyi","brother":"guanyu"} | 22            |
| 3            | wangwu         | {"father":"wangjianlin","mother":"ruhua","sister":"jingtian"} | 29            |
| 4            | mayun          | {"father":"mayongzhen","mother":"angelababy"}      | 26            |
+--------------+----------------+----------------------------------------------------+---------------+
4 rows selected (0.293 seconds)

sql示例:

-- 查出每个人的 爸爸、姐妹
select id,name,family_members["father"] as father,family_members["sister"] as sister,age
from t_family;

-- 查出每个人有哪些亲属关系
select id,name,map_keys(family_members) as relations,age
from  t_family;

-- 查出每个人的亲人名字
select id,name,map_values(family_members) as relations,age
from  t_family;

-- 查出每个人的亲人数量
select id,name,size(family_members) as relations,age
from  t_family;

-- 查出所有拥有兄弟的人及他的兄弟是谁
-- 方案1:一句话写完
select id,name,age,family_members['brother']
from t_family  where array_contains(map_keys(family_members),'brother');
-- 方案2:子查询
select id,name,age,family_members['brother']
from
(select id,name,age,map_keys(family_members) as relations,family_members 
from t_family) tmp 
where array_contains(relations,'brother');

结构体struct

需求

假如有以下数据:
1,zhangsan,18:male:深圳
2,lisi,28:female:北京
3,wangwu,38:male:广州
4,赵六,26:female:上海
5,钱琪,35:male:杭州
6,王八,48:female:南京

建表映射上述数据

create table t_user(id int,name string,info struct)
row format delimited fields terminated by ','
collection items terminated by ':';

查看结构:

0: jdbc:hive2://slave01:10000> desc t_user;
+-----------+-----------------------------------------+----------+
| col_name  |                data_type                | comment  |
+-----------+-----------------------------------------+----------+
| id        | int                                     |          |
| name      | string                                  |          |
| info      | struct  |          |
+-----------+-----------------------------------------+----------+
3 rows selected (0.219 seconds)

导入数据并查看

0: jdbc:hive2://slave01:10000> load data local inpath '/home/hadoop/test/struct.dat' into table t_user;
0: jdbc:hive2://slave01:10000> select * from t_user;
+------------+--------------+----------------------------------------+
| t_user.id  | t_user.name  |              t_user.info               |
+------------+--------------+----------------------------------------+
| 1          | zhangsan     | {"age":18,"sex":"male","addr":"深圳"}    |
| 2          | lisi         | {"age":28,"sex":"female","addr":"北京"}  |
| 3          | wangwu       | {"age":38,"sex":"male","addr":"广州"}    |
| 4          | 赵六           | {"age":26,"sex":"female","addr":"上海"}  |
| 5          | 钱琪           | {"age":35,"sex":"male","addr":"杭州"}    |
| 6          | 王八           | {"age":48,"sex":"female","addr":"南京"}  |
+------------+--------------+----------------------------------------+
6 rows selected (0.427 seconds)

-- 查询每个人的id name和地址
0: jdbc:hive2://slave01:10000> select id,name,info.addr
. . . . . . . . . . . . . . .> from t_user;

你可能感兴趣的:(hive-复合数据类型)