hive中可以直接导入json格式的数据,mongodb数据或者json串
1.到点击打开链接下载json-serde-1.3.7-jar-with-dependencies.jar
2.这个jar包添加到hive安装路径的lib目录下
3.打开hive执行
add jar /jar包所在路径/json-serde-1.3.7-jar-with-dependencies.jar;
4.测试数据如下
{"name" : "huangfeichang","age" : 20}
注:一个json需为一行数据
5.在hive中创建表CREATE EXTERNAL TABLE test_wyr.jsontest2(
name string,
age int
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
STORED AS TEXTFILE
location '/tmp/wangyuerun/jsontest1';
hive查询数据
hive> select * from test_wyr.jsontest2;
OK
huangfeichang 20
{
"_id": {
"$oid": "5a5714b92872ab3117967865"
},
"createTime": {
"$numberLong": "1515656377248"
},
"data": {
"key": "qihuo_mypage_mallInfo",
"mobile": "15225838187"
},
"evenName": "用户在我的页面点击积分商城的数据",
"messageId": "200010",
"pageName": "用户在我的页面点击积分商城的数据",
"productName": "qihuo",
"userId": "70",
"uuid": "6caef2cb-76e3-4834-a999-d0813b1f8ba7"
},
{
"_id": {
"$oid": "5a5714d32872ab3117967867"
},
"createTime": {
"$numberLong": "1515656403522"
},
"data": {
"key": "qihuo_mypage_mallInfo",
"mobile": "15225777187"
},
"evenName": "用户在我的页面点击积分商城的数据",
"messageId": "200010",
"pageName": "用户在我的页面点击积分商城的数据",
"productName": "qihuo",
"userId": "99",
"uuid": "e6137591-778d-43de-aff1-6af73110bf2c"
}
hive建表语句如下
CREATE EXTERNAL TABLE test_wyr.jsontest4(
createTime map,
data map,
evenName string,
messageId string,
pageName string,
productName string,
userId string,
uuid string
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
STORED AS TEXTFILE
location '/tmp/wangyuerun/jsontest4';
select * from test_wyr.jsontest4;
OK
{"$numberlong":"1515656377248"} {"mobile":"15225777187","key":"qihuo_mypage_mallInfo"} 用户在我的页面点击积分商城的数据 200010 用户在我的页面点击积分商城的数据 qihuo 70 6caef2cb-76e3-4834-a999-d0813b1f8ba7
{"$numberlong":"1515656403522"} {"mobile":"15225777187","key":"qihuo_mypage_mallInfo"} 用户在我的页面点击积分商城的数据 200010 用户在我的页面点击积分商城的数据 qihuo 99 e6137591-778d-43de-aff1-6af73110bf2c
Time taken: 0.27 seconds, Fetched: 2 row(s)
hive> select data['key'] from test_wyr.jsontest4;
OK
qihuo_mypage_mallInfo
qihuo_mypage_mallInfo
Time taken: 15.039 seconds, Fetched: 2 row(s)