这个问题着实是个大坑啊,使用mr解析用户ip没有问题,然后,想直接使用相同代码在hive中添加udf函数,不断地掉坑啊,踩坑踩了两天,总算是解决了。
1、编写代码
public class IP extends UDF{
private static Logger logger = Logger.getLogger(IP.class);
public static boolean enableFileWatch = false;
private static int offset;
private static int[] index = new int[256];
private static ByteBuffer dataBuffer;
private static ByteBuffer indexBuffer;
private static Path ipFile = new Path("/file/IpParse.dat");
private static Path maxMindFile = new Path("/hive_udf/GeoIP2-City.mmdb");
private static DatabaseReader reader;
private static InputStream in = null;
private static ReentrantLock lock = new ReentrantLock();
// public static void main(String[] args){
// try {
// IP ip = new IP();
// System.out.println(ip.evaluate("121.12.12.12"));
// } catch (Exception e) {
// // TODO Auto-generated catch block
// e.printStackTrace();
// }
// }
public String evaluate(String ip) {
try{
Configuration conf = new Configuration();
InputStream in = FileSystem.getLocal(conf).open(maxMindFile);
reader = new DatabaseReader.Builder(in).build();
FSDataInputStream fin = null;
fin = ipFile.getFileSystem(conf).open(ipFile);
dataBuffer = ByteBuffer.allocate(Long.valueOf(fin.available()).intValue());
int readBytesLength;
byte[] chunk = new byte[4096];
while (fin.available() > 0) {
readBytesLength = fin.read(chunk);
dataBuffer.put(chunk, 0, readBytesLength);
}
dataBuffer.position(0);
int indexLength = dataBuffer.getInt();
byte[] indexBytes = new byte[indexLength];
dataBuffer.get(indexBytes, 0, indexLength - 4);
indexBuffer = ByteBuffer.wrap(indexBytes);
indexBuffer.order(ByteOrder.LITTLE_ENDIAN);
offset = indexLength;
int loop = 0;
while (loop++ < 256) {
index[loop - 1] = indexBuffer.getInt();
}
indexBuffer.order(ByteOrder.BIG_ENDIAN);
//解析IP
if (ip == null) {
ip = "0.0.0.0";
} else {
String[] ips = ip.split("\\.", -1);
if (ips.length != 4) {
ip = "0.0.0.0";
}
}
int ip_prefix_value = new Integer(ip.substring(0, ip.indexOf(".")));
long ip2long_value = ip2long(ip);
int start = index[ip_prefix_value];
int max_comp_len = offset - 1028;
long index_offset = -1;
int index_length = -1;
byte b = 0;
for (start = start * 8 + 1024; start < max_comp_len; start += 8) {
if (int2long(indexBuffer.getInt(start)) >= ip2long_value) {
index_offset = bytesToLong(b, indexBuffer.get(start + 6), indexBuffer.get(start + 5), indexBuffer.get(start + 4));
index_length = 0xFF & indexBuffer.get(start + 7);
break;
}
}
byte[] areaBytes;
dataBuffer.position(offset + (int) index_offset - 1024);
areaBytes = new byte[index_length];
dataBuffer.get(areaBytes, 0, index_length);
String local = new String(areaBytes);
String[] locals = local.split("\t", -1);
if (locals.length < 5) {
local += "\t";
}
if (!locals[0].equals("中国")) {
try {
StringBuffer sb = new StringBuffer();
InetAddress addr = InetAddress.getByName(ip);
CityResponse city = reader.city(addr);
// logger.info(Thread.currentThread().getId());
sb.append(city.getCountry().getNames().get("zh-CN")).append("\t");
sb.append(city.getMostSpecificSubdivision().getName()).append("\t");
sb.append(city.getCity().getName()).append("\t");
sb.append("\t");
local = sb.toString();
} catch (Exception e) {
e.printStackTrace();
}
}
return local;
}catch(Exception ex){
ex.printStackTrace();
return ex.getMessage();
}
}
private static long bytesToLong(byte a, byte b, byte c, byte d) {
return int2long((((a & 0xff) << 24) | ((b & 0xff) << 16) | ((c & 0xff) << 8) | (d & 0xff)));
}
private static int str2Ip(String ip) {
String[] ss = ip.split("\\.");
int a, b, c, d;
a = Integer.parseInt(ss[0]);
b = Integer.parseInt(ss[1]);
c = Integer.parseInt(ss[2]);
d = Integer.parseInt(ss[3]);
return (a << 24) | (b << 16) | (c << 8) | d;
}
private static long ip2long(String ip) {
return int2long(str2Ip(ip));
}
private static long int2long(int i) {
long l = i & 0x7fffffffL;
if (i < 0) {
l |= 0x080000000L;
}
return l;
}
}
2、代码写完之后,使用本地运行和在linux下运行,都没有问题,但是放到hive中,就会报错。
Caused by: java.lang.NoSuchMethodError: com.fasterxml.jackson.databind.node.ObjectNode.(Lcom/fasterxml/jackson/databind/node/JsonNodeFactory;Ljava/util/Map;)V
at com.maxmind.db.Decoder.decodeMap(Decoder.java:285)
at com.maxmind.db.Decoder.decodeByType(Decoder.java:154)
at com.maxmind.db.Decoder.decode(Decoder.java:147)
at com.maxmind.db.Decoder.decodeMap(Decoder.java:281)
at com.maxmind.db.Decoder.decodeByType(Decoder.java:154)
at com.maxmind.db.Decoder.decode(Decoder.java:147)
at com.maxmind.db.Decoder.decode(Decoder.java:87)
at com.maxmind.db.Reader.(Reader.java:132)
at com.maxmind.db.Reader.(Reader.java:116)
at com.maxmind.geoip2.DatabaseReader.(DatabaseReader.java:35)
at com.maxmind.geoip2.DatabaseReader.(DatabaseReader.java:23)
at com.maxmind.geoip2.DatabaseReader$Builder.build(DatabaseReader.java:129)
at com.bankofamerica.gisds.ExtractEnterpriseData.ExtractEnterpriseDB(ExtractEnterpriseData.java:27)
at com.package.name.App.evaluate(App.java:73)
3、解决问题
看到这个错误,考虑是jar冲突,因为在解析过程中需要使用hadoop的jar包引用,因此在所有引用中删除掉jackson_databind,无济于事,还是顽强报相同的错误。
com.fasterxml.jackson.core
jackson-databind
com.fasterxml.jackson.core
jackson-annotations
4、我引用的GeoIP2为2.9.0版本,这是问题的所在
com.maxmind.geoip2
geoip2
2.9.0
com.fasterxml.jackson.core
jackson-databind
com.fasterxml.jackson.core
jackson-annotations
5、将版本改为2.5.0,问题解决。
6、导入hive
delete jar /hive_udf/tt.jar;
add jar /hive_udf/tt.jar;
add file /hive_udf/GeoIP2-City.mmdb;
add file /hive_udf/IpParse.dat;
CREATE TEMPORARY FUNCTION pars_IP AS 'com.dewmobile.ParsIP.IP';
SELECT pars_IP('121.12.12.12');
7、运行:
8、pom.xml
org.apache.hadoop
hadoop-mapreduce-client-core
2.3.0
org.apache.hadoop
hadoop-common
2.3.0
com.maxmind.geoip2
geoip2
2.5.0
org.apache.hive
hive-exec
1.1.1
maven-assembly-plugin
jar-with-dependencies
make-assembly
package
single