数组
需求:
加入有一个表,其中的字段为array类型
表数据:
1,zhangsan,数学:语文:英语:生物
2,lisi,数学:语文
3,wangwu,化学:计算机:java编程
建表:
create table t_xuanxiu(uid string,name string,kc array)
row format delimited
fields terminated by ','
collection items terminated by ':';
查看表信息:
0: jdbc:hive2://slave01:10000> desc t_xuanxiu;
+-----------+----------------+----------+
| col_name | data_type | comment |
+-----------+----------------+----------+
| uid | string | |
| name | string | |
| kc | array | |
+-----------+----------------+----------+
3 rows selected (0.191 seconds)
导入数据并查看:
0: jdbc:hive2://slave01:10000> load data local inpath '/home/hadoop/test/xuanxiu.dat' into table t_xuanxiu;
No rows affected (1.257 seconds)
0: jdbc:hive2://slave01:10000> select * from t_xuanxiu;
+----------------+-----------------+------------------------+
| t_xuanxiu.uid | t_xuanxiu.name | t_xuanxiu.kc |
+----------------+-----------------+------------------------+
| 1 | zhangsan | ["数学","语文","英语","生物"] |
| 2 | lisi | ["数学","语文"] |
| 3 | wangwu | ["化学","计算机","java编程"] |
+----------------+-----------------+------------------------+
3 rows selected (0.302 seconds)
-- 根据索引获取数组中的元素
0: jdbc:hive2://slave01:10000> select kc[1] from t_xuanxiu;
+------+
| _c0 |
+------+
| 语文 |
| 语文 |
| 计算机 |
+------+
3 rows selected (0.306 seconds)
-- 根据索引超出数组的范围,返回NULL
0: jdbc:hive2://slave01:10000> select kc[2] from t_xuanxiu;
+---------+
| _c0 |
+---------+
| 英语 |
| NULL |
| java编程 |
+---------+
3 rows selected (0.302 seconds)
-- 获取数组的大小
0: jdbc:hive2://slave01:10000> select size(kc) from t_xuanxiu;
+------+
| _c0 |
+------+
| 4 |
| 2 |
| 3 |
+------+
3 rows selected (0.435 seconds)
-- 判断数组中是否包含某个元素
0: jdbc:hive2://slave01:10000> select array_contains(kc,'数学') from t_xuanxiu;
+--------+
| _c0 |
+--------+
| true |
| true |
| false |
+--------+
3 rows selected (0.309 seconds)
Map类型
需求:
-- 有如下数据,希望存入到表结构中
1,zhangsan,father:xiaoming#mother:xiaohuang#brother:xiaoxu,28
2,lisi,father:mayun#mother:huangyi#brother:guanyu,22
3,wangwu,father:wangjianlin#mother:ruhua#sister:jingtian,29
4,mayun,father:mayongzhen#mother:angelababy,26
建表映射上述数据
create table t_family(id int,name string,family_members map,age int)
row format delimited fields terminated by ',' -- 字段属性分隔符
collection items terminated by '#' -- 集合元素的分隔符
map keys terminated by ':'; -- map中key和value的分隔符
-- 查看表结构
0: jdbc:hive2://slave01:10000> desc t_family;
+-----------------+---------------------+----------+
| col_name | data_type | comment |
+-----------------+---------------------+----------+
| id | int | |
| name | string | |
| family_members | map | |
| age | int | |
+-----------------+---------------------+----------+
4 rows selected (0.263 seconds)
--导入数据并查询
0: jdbc:hive2://slave01:10000> load data local inpath '/home/hadoop/test/map.dat' into table t_family;
No rows affected (0.703 seconds)
0: jdbc:hive2://slave01:10000> select * from t_family;
+--------------+----------------+----------------------------------------------------+---------------+
| t_family.id | t_family.name | t_family.family_members | t_family.age |
+--------------+----------------+----------------------------------------------------+---------------+
| 1 | zhangsan | {"father":"xiaoming","mother":"xiaohuang","brother":"xiaoxu"} | 28 |
| 2 | lisi | {"father":"mayun","mother":"huangyi","brother":"guanyu"} | 22 |
| 3 | wangwu | {"father":"wangjianlin","mother":"ruhua","sister":"jingtian"} | 29 |
| 4 | mayun | {"father":"mayongzhen","mother":"angelababy"} | 26 |
+--------------+----------------+----------------------------------------------------+---------------+
4 rows selected (0.293 seconds)
sql示例:
-- 查出每个人的 爸爸、姐妹
select id,name,family_members["father"] as father,family_members["sister"] as sister,age
from t_family;
-- 查出每个人有哪些亲属关系
select id,name,map_keys(family_members) as relations,age
from t_family;
-- 查出每个人的亲人名字
select id,name,map_values(family_members) as relations,age
from t_family;
-- 查出每个人的亲人数量
select id,name,size(family_members) as relations,age
from t_family;
-- 查出所有拥有兄弟的人及他的兄弟是谁
-- 方案1:一句话写完
select id,name,age,family_members['brother']
from t_family where array_contains(map_keys(family_members),'brother');
-- 方案2:子查询
select id,name,age,family_members['brother']
from
(select id,name,age,map_keys(family_members) as relations,family_members
from t_family) tmp
where array_contains(relations,'brother');
结构体struct
需求
假如有以下数据:
1,zhangsan,18:male:深圳
2,lisi,28:female:北京
3,wangwu,38:male:广州
4,赵六,26:female:上海
5,钱琪,35:male:杭州
6,王八,48:female:南京
建表映射上述数据
create table t_user(id int,name string,info struct)
row format delimited fields terminated by ','
collection items terminated by ':';
查看结构:
0: jdbc:hive2://slave01:10000> desc t_user;
+-----------+-----------------------------------------+----------+
| col_name | data_type | comment |
+-----------+-----------------------------------------+----------+
| id | int | |
| name | string | |
| info | struct | |
+-----------+-----------------------------------------+----------+
3 rows selected (0.219 seconds)
导入数据并查看
0: jdbc:hive2://slave01:10000> load data local inpath '/home/hadoop/test/struct.dat' into table t_user;
0: jdbc:hive2://slave01:10000> select * from t_user;
+------------+--------------+----------------------------------------+
| t_user.id | t_user.name | t_user.info |
+------------+--------------+----------------------------------------+
| 1 | zhangsan | {"age":18,"sex":"male","addr":"深圳"} |
| 2 | lisi | {"age":28,"sex":"female","addr":"北京"} |
| 3 | wangwu | {"age":38,"sex":"male","addr":"广州"} |
| 4 | 赵六 | {"age":26,"sex":"female","addr":"上海"} |
| 5 | 钱琪 | {"age":35,"sex":"male","addr":"杭州"} |
| 6 | 王八 | {"age":48,"sex":"female","addr":"南京"} |
+------------+--------------+----------------------------------------+
6 rows selected (0.427 seconds)
-- 查询每个人的id name和地址
0: jdbc:hive2://slave01:10000> select id,name,info.addr
. . . . . . . . . . . . . . .> from t_user;