es.output.json=true
,如此每个document将返回一个json{
"netsed": {
"mappings": {
"test": {
"properties": {
"Nested2": {
"properties": {
"Iterm3": {
"type": "float"
},
"Iterm4": {
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
},
"type": "text"
}
}
},
"nested1": {
"properties": {
"item1": {
"fields": {
"keyword": {
"ignore_above": 256,
"type": "keyword"
}
},
"type": "text"
},
"item2": {
"type": "long"
}
}
}
}
}
}
}
}
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 1,
"hits": [
{
"_index": "netsed",
"_type": "test",
"_id": "2",
"_score": 1,
"_source": {
"nested1": {
"item1": "Big",
"item2": 10
},
"Nested2": {
"Iterm3": 2.3,
"Iterm4": "small"
}
}
}
]
}
}
es_metadata
={"_index":"netsed","_type":"test","_id":"2","sort":["0"],"_score":"null"}
add jar elasticsearch-hadoop-6.1.2.jar;
add jar json-udf-1.3.8-jar-with-dependencies.jar;
add jar json-serde-1.3.8-jar-with-dependencies.jar;
CREATE TABLE x (
`es_metadata` string,
`nested1` struct,
`nested2` struct)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
TBLPROPERTIES(
'es.output.json' = 'true',
'es.resource.read' = 'netsed/test',
'es.nodes'='${nodes}',
'es.read.metadata' = 'true',
'es.read.metadata.field' = 'es_metadata',
'es.field.read.empty.as.null'='false',
'es.mapping.names' = 'nested2:Nested2,nested1:nested1'
);
add jar elasticsearch-hadoop-6.1.2.jar;
add jar json-udf-1.3.8-jar-with-dependencies.jar;
add jar json-serde-1.3.8-jar-with-dependencies.jar;
CREATE EXTERNAL TABLE IF NOT EXISTS x (
`es_metadata` string,
`Nested2` string COMMENT 'NULL',
`nested1` string COMMENT 'NULL'
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES ("case.insensitive" = "false",
"mapping.nested2"="Nested2",
"mapping.nested1"="nested1"
)
STORED BY 'org.elasticsearch.hadoop.hive.EsStorageHandler'
TBLPROPERTIES(
'es.output.json' = 'true',
'es.resource.read' = 'netsed/test',
'es.nodes'='${nodes}',
'es.read.metadata' = 'true',
'es.read.metadata.field' = 'es_metadata',
'es.field.read.empty.as.null'='false',
'es.mapping.names' = 'nested2:Nested2,nested1:nested1'
);
"case.insensitive" = "false"
,默认为true;这个配置的含义是json中的key是否大小写敏感,不敏感时json中key都会被转为小写,如: //设置"case.insensitive" = "true"时,Iterm3和Iterm4被转为了小写
{"_index":"netsed","_type":"test","_id":"2","sort":["0"],"_score":"null"} {"iterm4":"small","iterm3":"2.3"} {"item2":"10","item1":"Big"}
//Hive-JSON-Serde 源代码
/**
* Builds mappings between hive columns and json attributes
*
* @param tbl
* @return
*/
private Map getMappings(Properties tbl, boolean isCaseInsensitive) {
int n = PFX.length();
Map mps = new HashMap();
for(Object o: tbl.keySet()) {
if( ! (o instanceof String)) { continue ; }
String s = (String) o;
if(s.startsWith(PFX) ) {
String fieldTo = tbl.getProperty(s);
mps.put(s.substring(n), (isCaseInsensitive ? fieldTo.toLowerCase(): fieldTo));
}
}
return mps;
}
"case.insensitive" = "false"
时,需要手动指定映射关系"mapping.nested2"="Nested2","mapping.nested1"="nested1"
例: Iterm3,Iterm4此时是大写
{"_index":"netsed","_type":"test","_id":"2","sort":["0"],"_score":"null"} {"Iterm3":"2.3","Iterm4":"small"} {"item2":"10","item1":"Big"}
官方解释:
Case Sensitivity in mappings
Since hive is case insensitive, all JSON keys are by default lowercased, to accomodate situations where the same JSON key is in a different case. However, this may not be what you want, you may need to treat the same key with different case as two different ones. You’ll then have to use mappings, since hive does not support case sensitive columns, and you’ll also have to tell the SerDe not to be case insensitive (the default).
CREATE TABLE mytable (
time1 string,
time2 string)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES (
"case.insensitive" = "false", --tells hive to ignore key case
"mapping.time1"= "time", -- lowercase 'time' mapped into 'time1'
"mapping.time2"= "Time) -- uppercase to 'time2'
-- Data: { "time" : "2012-10-22:, "Time": "2012-11-22"}
SELECT time1,time2 from mytable
除了bool类型,其他类型的值均会被转为string类型
,如{"Iterm3":"2.3","Iterm4":"small"}
中Iterm3在es中是float类型,在这里被转为了sting;这是由于Hive-JSON-Serde反序列化时调用了以下方法: org.openx.data.jsonserde.json
/**
* Try to convert a string into a number, boolean, or null. If the string
* can't be converted, return the string.
* @param string A String.
* @return A simple JSON value.
*/
public static Object stringToValue(String string) {
if (string.equals("")) {
return string;
}
if (string.equalsIgnoreCase("true")) {
return Boolean.TRUE;
}
if (string.equalsIgnoreCase("false")) {
return Boolean.FALSE;
}
if (string.equalsIgnoreCase("null")) {
return JSONObject.NULL;
}
return string;
}