Hive基础之Hive的复杂类型

ARRAY

一组有序字段,字段的类型必须相同。Array(1,2)

create table hive_array(ip string, uid array<string>)

row format delimited 

fields terminated by ','

collection items terminated by '|'

stored as textfile;

加载数据

load data local inpath "/home/spark/software/data/hive_array.txt" overwrite into table hive_array;

 

hive_array.txt

192.168.1.1,www.baidu.com|www.google.com|www.qq.com

192.168.1.2,www.baidu.com|www.sina.com|www.sohu.com

192.168.1.3,www.qq.com|www.163.com|www.youku.com
select * from hive_array;                                                                         

192.168.1.1     ["www.baidu.com","www.google.com","www.qq.com"]

192.168.1.2     ["www.baidu.com","www.sina.com","www.sohu.com"]

192.168.1.3     ["www.qq.com","www.163.com","www.youku.com"]

使用下标访问,下标从0开始:

select ip, uid[0] as id from hive_array;

192.168.1.1     www.baidu.com

192.168.1.2     www.baidu.com

192.168.1.3     www.qq.com

查看数据长度:

select size(uid) from hive_array;

3

3

3

数组查找:

select * from hive_array where array_contains(uid, "www.baidu.com");

192.168.1.1     ["www.baidu.com","www.google.com","www.qq.com"]

192.168.1.2     ["www.baidu.com","www.sina.com","www.sohu.com"]

 

MAP

一组无序的键值对,键的类型必须是原子的,值可以是任何类型,同一个映射的键的类型必须相同,值的类型也必须相同。Map('a',1,'b',2)

create table hive_map(ts string, ip string, type string, logtype string, request Map<string,string>, response Map<string, string>)

row format delimited fields terminated by '#'

collection items terminated by '&'

map keys terminated by '='

stored as textfile;

 hive_map.txt

2014-03-03 12:22:34#127.0.0.1#get#amap#src=123&code=456&cookie=789#status=success&time=2s

2014-03-03 11:22:34#127.0.0.1#get#autonavi#src=123&code=456#status=success&time=2s&cookie=789

加载数据:

load data local inpath "/home/spark/software/data/hive_map.txt" overwrite into table hive_map;

查看表结构: desc hive_map

ts                      string                  None                

ip                      string                  None                

type                    string                  None                

logtype                 string                  None                

request                 map<string,string>      None                

response                map<string,string>      None 

查看所有字段:

select * from hive_map;



2014-03-03 12:22:34     127.0.0.1       get     amap    {"src":"123","code":"456","cookie":"789"}       {"status":"success","time":"2s"}

2014-03-03 11:22:34     127.0.0.1       get     autonavi        {"src":"123","code":"456"}      {"status":"success","time":"2s","cookie":"789"}

查看map中指定的字段:

select request['src'], request['code'], request['cookie'] from hive_map;



123     456     789

123     456     NULL

 

STRUCT

一组命名的字段,字段类型可以不同。 Struct('a',1,2,0)

create table hive_struct(ip string, user struct<name:string, age:int>)

row format delimited fields terminated by '#'

collection  items terminated by ':'

stored as textfile;

hive_struct.txt

192.168.1.1#zhangsan:40

192.168.1.2#lisi:50

192.168.1.3#wangwu:60

192.168.1.4#zhaoliu:70

加载数据:

load data local inpath "/home/spark/software/data/hive_struct.txt" overwrite into table hive_struct;

查询所有字段:

select * from hive_struct;



192.168.1.1     {"name":"zhangsan","age":40}

192.168.1.2     {"name":"lisi","age":50}

192.168.1.3     {"name":"wangwu","age":60}

192.168.1.4     {"name":"zhaoliu","age":70}

查询指定字段:

select user.name, user.age from hive_struct;



zhangsan        40

lisi    50

wangwu  60

zhaoliu 70

 

你可能感兴趣的:(hive)