如果只是返回String,那么直接继承UDF即可,如果想要返回MAP/LIST/STRUCT,则需要继承GenericUDF;
如下代码示例,将URL中的参数进行了解析成了一个MAP返回:
import java.util.LinkedHashMap; import java.util.Map; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; import org.apache.hadoop.io.Text; public class UrlParamsToMap extends GenericUDF { private final Map sortMap = new LinkedHashMap(); private StringObjectInspector urlOI; @Override public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { if (arguments.length != 1) { throw new UDFArgumentException("UrlParamsToMap param must be 1 argu."); } urlOI = (StringObjectInspector) arguments[0]; return ObjectInspectorFactory.getStandardMapObjectInspector( PrimitiveObjectInspectorFactory.writableStringObjectInspector, PrimitiveObjectInspectorFactory.writableStringObjectInspector); } @Override public Object evaluate(DeferredObject[] deferredObjects) throws HiveException { Object urlObj = deferredObjects[0].get(); Text url = (Text) urlOI.getPrimitiveWritableObject(urlObj); getParamsMap(url.toString(), sortMap); return sortMap; } public Map getParamsMap(String url, Map sortMap) { Map defaultMap = new LinkedHashMap(); if (StringUtils.isBlank(url)) { return defaultMap; } String[] urlSplits = url.split("\\?"); if (null == urlSplits || urlSplits.length != 2) { return defaultMap; } String urlParamStr = urlSplits[1]; if (StringUtils.isBlank(urlParamStr)) { return defaultMap; } String[] paramSplits = urlParamStr.split("&"); if (null == paramSplits || paramSplits.length == 0) { return defaultMap; } for (String kvStr : paramSplits) { if (StringUtils.isBlank(kvStr)) { continue; } String[] kvs = kvStr.split("="); if (null != kvs && kvs.length == 2) { if (StringUtils.isNotBlank(kvs[0]) && StringUtils.isNotBlank(kvs[1])) { sortMap.put(new Text(kvs[0]), new Text(kvs[1])); } } } return sortMap; } @Override public String getDisplayString(String[] strings) { return "map(" + strings[0] + ")"; } }
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
|
import
java
.
util
.
LinkedHashMap
;
import
java
.
util
.
Map
;
import
org
.
apache
.
commons
.
lang
.
StringUtils
;
import
org
.
apache
.
hadoop
.
hive
.
ql
.
exec
.
UDFArgumentException
;
import
org
.
apache
.
hadoop
.
hive
.
ql
.
metadata
.
HiveException
;
import
org
.
apache
.
hadoop
.
hive
.
ql
.
udf
.
generic
.
GenericUDF
;
import
org
.
apache
.
hadoop
.
hive
.
serde2
.
objectinspector
.
ObjectInspector
;
import
org
.
apache
.
hadoop
.
hive
.
serde2
.
objectinspector
.
ObjectInspectorFactory
;
import
org
.
apache
.
hadoop
.
hive
.
serde2
.
objectinspector
.
primitive
.
PrimitiveObjectInspectorFactory
;
import
org
.
apache
.
hadoop
.
hive
.
serde2
.
objectinspector
.
primitive
.
StringObjectInspector
;
import
org
.
apache
.
hadoop
.
io
.
Text
;
public
class
UrlParamsToMap
extends
GenericUDF
{
private
final
Map
<
Text
,
Text
>
sortMap
=
new
LinkedHashMap
<
Text
,
Text
>
(
)
;
private
StringObjectInspector
urlOI
;
@Override
public
ObjectInspector
initialize
(
ObjectInspector
[
]
arguments
)
throws
UDFArgumentException
{
if
(
arguments
.
length
!=
1
)
{
throw
new
UDFArgumentException
(
"UrlParamsToMap param must be 1 argu."
)
;
}
urlOI
=
(
StringObjectInspector
)
arguments
[
0
]
;
return
ObjectInspectorFactory
.
getStandardMapObjectInspector
(
PrimitiveObjectInspectorFactory
.
writableStringObjectInspector
,
PrimitiveObjectInspectorFactory
.
writableStringObjectInspector
)
;
}
@Override
public
Object
evaluate
(
DeferredObject
[
]
deferredObjects
)
throws
HiveException
{
Object
urlObj
=
deferredObjects
[
0
]
.
get
(
)
;
Text
url
=
(
Text
)
urlOI
.
getPrimitiveWritableObject
(
urlObj
)
;
getParamsMap
(
url
.
toString
(
)
,
sortMap
)
;
return
sortMap
;
}
public
Map
<
Text
,
Text
>
getParamsMap
(
String
url
,
Map
<
Text
,
Text
>
sortMap
)
{
Map
<
Text
,
Text
>
defaultMap
=
new
LinkedHashMap
<
Text
,
Text
>
(
)
;
if
(
StringUtils
.
isBlank
(
url
)
)
{
return
defaultMap
;
}
String
[
]
urlSplits
=
url
.
split
(
"\\?"
)
;
if
(
null
==
urlSplits
||
urlSplits
.
length
!=
2
)
{
return
defaultMap
;
}
String
urlParamStr
=
urlSplits
[
1
]
;
if
(
StringUtils
.
isBlank
(
urlParamStr
)
)
{
return
defaultMap
;
}
String
[
]
paramSplits
=
urlParamStr
.
split
(
"&"
)
;
if
(
null
==
paramSplits
||
paramSplits
.
length
==
0
)
{
return
defaultMap
;
}
for
(
String
kvStr
:
paramSplits
)
{
if
(
StringUtils
.
isBlank
(
kvStr
)
)
{
continue
;
}
String
[
]
kvs
=
kvStr
.
split
(
"="
)
;
if
(
null
!=
kvs
&&
kvs
.
length
==
2
)
{
if
(
StringUtils
.
isNotBlank
(
kvs
[
0
]
)
&&
StringUtils
.
isNotBlank
(
kvs
[
1
]
)
)
{
sortMap
.
put
(
new
Text
(
kvs
[
0
]
)
,
new
Text
(
kvs
[
1
]
)
)
;
}
}
}
return
sortMap
;
}
@Override
public
String
getDisplayString
(
String
[
]
strings
)
{
return
"map("
+
strings
[
0
]
+
")"
;
}
}
|
转载请注明来自:疯狂的蚂蚁www.crazyant.net