hive中复杂数据类型的使用

1.Array:数组类型,一系列相同元素组成

创建一张student表

create table student(
sid int,
sname string,
grade array)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
COLLECTION ITEMS TERMINATED BY ','
STORED AS TEXTFILE;

> desc student; +-----------+---------------+----------+--+
| col_name | data_type | comment | +-----------+---------------+----------+--+
| sid       | int           |          |
| sname     | string        |          |
| grade | array | | +-----------+---------------+----------+--+

hdfs dfs -put student.txt  /user/hive/warehouse/xxx.db/student

> select * from student; +--------------+----------------+-------------------+--+
| student.sid | student.sname | student.grade | +--------------+----------------+-------------------+--+
| 1            | Mark           | [78.0,81.5,90.0]  |
| 2            | john           | [67.0,78.5]       |
+--------------+----------------+-------------------+--+

student.txt的数据:

1   Mark    78,81.5,90
2   john    67,78.5

2. Map:key-value集合,通过key访问元素

create table student1(
sid int,
sname string,
grade map)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
COLLECTION ITEMS TERMINATED BY '|'
MAP KEYS TERMINATED BY ':'
STORED AS TEXTFILE;

> desc student1; +-----------+--------------------+----------+--+
| col_name | data_type | comment | +-----------+--------------------+----------+--+
| sid       | int                |          |
| sname     | string             |          |
| grade | map | | +-----------+--------------------+----------+--+

> select * from student1; +---------------+-----------------+----------------------------------------------+--+
| student1.sid | student1.sname | student1.grade | +---------------+-----------------+----------------------------------------------+--+
| 1             | Mark            | {"\"语文\"":78.0,"\"英语\"":81.5,"\"数学\"":90.0}  |
| 2 | john | {"\"语文\"":65.0,"\"英语\"":85.5} | +---------------+-----------------+----------------------------------------------+--+

student1.txt的数据:

1   Mark    "语文":78|"英语":81.5|"数学":90
2   john    "语文":65|"英语":85.5

array

create table student2(
sid int,
sname string,
grade array>)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
COLLECTION ITEMS TERMINATED BY '|'
MAP KEYS TERMINATED BY ':'
STORED AS TEXTFILE;

数据格式如:{1,'Mark',[<"高等数学",80>,<"c语言",83>]}
beeline> desc student2; +-----------+---------------------------+----------+--+
| col_name | data_type | comment | +-----------+---------------------------+----------+--+
| sid       | int                       |          |
| sname     | string                    |          |
| grade | array> | | +-----------+---------------------------+----------+--+

3. struct:结构类型,类似于C和C++中的结构体。可以包含不同数据类型的元素,这些元素可以通过点语法的方式来得到所需要的元素。

create table student3(
sid int,
info struct)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
COLLECTION ITEMS TERMINATED BY '|'
MAP KEYS TERMINATED BY ':'
STORED AS TEXTFILE;

> desc student3; +-----------+-----------------------------------------+----------+--+
| col_name | data_type | comment | +-----------+-----------------------------------------+----------+--+
| sid       | int                                     |          |
| info | struct | | +-----------+-----------------------------------------+----------+--+

> select * from student3; +---------------+--------------------------------------------------------+--+
| student3.sid | student3.info | +---------------+--------------------------------------------------------+--+
| 1             | {"name":"name:\"Mark\"","age":null,"sex":"sex:\"男\""}  |
| 2 | {"name":"name:\"Lily\"","age":null,"sex":"sex:\"女\""} | +---------------+--------------------------------------------------------+--+

你可能感兴趣的:(hive)