本文地址:http://blog.csdn.net/hblfyla/article/details/72722609
最近在做第三方数据源的数据的hive表映射,要么得写MR做清洗数据,然后映射Hive表 ,现在使用GitHub上的JsonSerde解析Json数据,映射为Hive表
GitHub源码地址:https://github.com/KunBetter/JsonSerDe
文档地址:https://github.com/rcongiu/Hive-JSON-Serde
原始数据:
{"id":{"oid":"592238e9e4b03574f7a895ed"},"createTimeStr":"2017-05-2209:03:37","rawData":{"success":"true","report_data":{"application_check":[{"check_points":{"key_value":"喻俊"},"app_point":"user_name"},{"check_points":{"region":"吴县市","financial_blacklist":{"arised":false,"black_type":[]},"court_blacklist":{"arised":false,"black_type":[]},"age":22,"key_value":"320586199407255631","province":"江苏省","gender":"男","city":"苏州市"},"app_point":"id_card"},{"check_points":{"financial_blacklist":{"arised":false,"black_type":[]},"website":"江苏移动","check_name":"用户姓名与运营商提供的姓名[毛建生]匹配失败","key_value":"13812614445","reliability":"实名认证","reg_time":"2009-07-0117:24:19","check_idcard":"运营商未提供身份证号码","check_ebusiness":"无法判断该号码的电商使用情况(无电商数据)"},"app_point":"cell_phone"},{"check_points":{"check_addr":"无法定位居住地址(未提供居住地址)","key_value":"","check_ebusiness":"无法判断该居住地址的电商使用情况(无电商数据)"},"app_point":"home_addr"},{"check_points":{"key_value":"","check_mobile":"无法判断该家庭电话的通话情况(无家庭电话)"},"app_point":"home_phone"}]},"note":"1111"},"idCard":"320586199407255631"}
{
"id": {
"oid": "592238e9e4b03574f7a895ed"
},
"createTimeStr": "2017-05-2209:03:37",
"rawData": {
"success": "true",
"report_data": {
"application_check": [
{
"check_points": {
"key_value": "喻俊"
},
"app_point": "user_name"
},
{
"check_points": {
"region": "吴县市",
"financial_blacklist": {
"arised": false,
"black_type": []
},
"court_blacklist": {
"arised": false,
"black_type": []
},
"age": 22,
"key_value": "320586199407255631",
"province": "江苏省",
"gender": "男",
"city": "苏州市"
},
"app_point": "id_card"
},
{
"check_points": {
"financial_blacklist": {
"arised": false,
"black_type": []
},
"website": "江苏移动",
"check_name": "用户姓名与运营商提供的姓名[毛建生]匹配失败",
"key_value": "13812614445",
"reliability": "实名认证",
"reg_time": "2009-07-0117:24:19",
"check_idcard": "运营商未提供身份证号码",
"check_ebusiness": "无法判断该号码的电商使用情况(无电商数据)"
},
"app_point": "cell_phone"
},
{
"check_points": {
"check_addr": "无法定位居住地址(未提供居住地址)",
"key_value": "",
"check_ebusiness": "无法判断该居住地址的电商使用情况(无电商数据)"
},
"app_point": "home_addr"
},
{
"check_points": {
"key_value": "",
"check_mobile": "无法判断该家庭电话的通话情况(无家庭电话)"
},
"app_point": "home_phone"
}
]
},
"note": "1111"
},
"idCard": "320586199407255631"
}
建表语句及load语句:
drop table juxinli_01;
create external table juxinli_01 (
id struct,
createTimeStr string,
rawData struct,app_point:string>>>,note:string>,
idCard string,
name string
) PARTITIONED BY (ds string)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES (
"id"="$.id",
"createTimeStr"="$.createTimeStr",
"rawData"="$.rawData",
"idCard"="$.idCard" );
load data local inpath '/root/Downloads/txt/juxinli.txt' overwrite into table juxinli_01 partition(ds='2017-05-24');
查询语句【1】:
select rawData.report_data.application_check.check_points from juxinli_01;
查询语句【2】:
select rawData.report_data.application_check[1].check_points['city'] from juxinli_01;