某人需要一个计算移动平均的udf
http://www.wangxiao.cn/UserT/GetHtml.aspx?id=/1093/1093-1-4-1-2-2008-83196083.html
移动平均
hive> add jar ./MovingAvgUDF.jar;
Added ./MovingAvgUDF.jar to class path
Added resource: ./MovingAvgUDF.jar
hive> create temporary function MovingAvgUDF as 'com.bi.udf.MovingAvgUDF';
OK
Time taken: 0.641 seconds
hive> select col1 from a;
Automatically selecting local only mode for query
Total MapReduce jobs = 1
Launching Job 1 out of 1
Number of reduce tasks is set to 0 since there's no reduce operator
Execution log at: /tmp/xxx/hive.log
Job running in-process (local Hadoop)
Hadoop job information for null: number of mappers: 0; number of reducers: 0
2013-03-06 16:07:25,053 null map = 100%, reduce = 0%
Ended Job = job_local_0001
2013-03-06 04:07:25 End of local task; Time Taken: 3.063 sec.
OK
["12","23","23","21","45","65","23"]
Time taken: 4.574 seconds
hive> select MovingAvgUDF(col1, 5) from a;
Automatically selecting local only mode for query
Total MapReduce jobs = 1
Launching Job 1 out of 1
Number of reduce tasks is set to 0 since there's no reduce operator
Execution log at: /tmp/xxx/hive.log
Job running in-process (local Hadoop)
Hadoop job information for null: number of mappers: 0; number of reducers: 0
2013-03-06 16:06:48,340 null map = 0%, reduce = 0%
2013-03-06 16:06:49,342 null map = 100%, reduce = 0%
Ended Job = job_local_0001
2013-03-06 04:06:49 End of local task; Time Taken: 3.99 sec.
OK
[12.0,17.5,19.333333333333332,19.75,24.8,35.4,35.4]
Time taken: 6.623 seconds
hive> select MovingAvgUDF(col1, 2) from a;
Automatically selecting local only mode for query
Total MapReduce jobs = 1
Launching Job 1 out of 1
Number of reduce tasks is set to 0 since there's no reduce operator
Execution log at: /tmp/xxx/hive.log
Job running in-process (local Hadoop)
Hadoop job information for null: number of mappers: 0; number of reducers: 0
2013-03-06 16:33:43,930 null map = 100%, reduce = 0%
Ended Job = job_local_0001
2013-03-06 04:33:44 End of local task; Time Taken: 2.848 sec.
OK
[12.0,17.5,23.0,22.0,33.0,55.0,44.0]
Time taken: 5.001 seconds
package com.bi.udf;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
@Description(
name = "moving_avg",
value = " return the moving average of a time series for a given timewindow"
)
public class MovingAvgUDF extends GenericUDF {
private ListObjectInspector listInspector;
private IntObjectInspector dayWindowInspector;
private List parseDoubleList(ListobjList) {
List arrList = new ArrayList();
for (Object obj : objList) {
Object dblObj = ((PrimitiveObjectInspector) (listInspector.getListElementObjectInspector())).getPrimitiveJavaObject(obj);
if (dblObj instanceof Number) {
Number dblNum = (Number) dblObj;
arrList.add(dblNum.doubleValue());
} else {
// // Try to coerce it otherwise
String dblStr = (dblObj.toString());
try {
Double dblCoerce = Double.parseDouble(dblStr);
arrList.add(dblCoerce);
} catch (NumberFormatException formatExc) {
formatExc.printStackTrace();
}
}
}
return arrList;
}
public List evaluate(ListtimeSeriesObj, int dayWindow) {
List timeSeries = this.parseDoubleList(timeSeriesObj);
List mvnAvgTimeSeries = new ArrayList(timeSeries.size());
double mvnTotal = 0.0;
for (int i = 0; i < timeSeries.size(); ++i) {
mvnTotal += timeSeries.get(i);
if (i >= dayWindow) {
mvnTotal -= timeSeries.get(i - dayWindow);
double mvnAvg = mvnTotal / ((double) dayWindow);
mvnAvgTimeSeries.add(mvnAvg);
} else {
if (i > 0) {
double mvnAvg = mvnTotal / ((double) i + 1.0);
mvnAvgTimeSeries.add(mvnAvg);
} else {
mvnAvgTimeSeries.add(mvnTotal); // /
}
}
}
return mvnAvgTimeSeries;
}
@Override
public Object evaluate(DeferredObject[] arg0) throws HiveException {
List argList = listInspector.getList(arg0[0].get());
int dayWindow = dayWindowInspector.get(arg0[1].get());
if (argList != null)
return evaluate(argList, dayWindow);
else
return null;
}
@Override
public String getDisplayString(String[] arg0) {
return "moving_avg(" + arg0[0] + ", " + arg0[1] + ")";
}
@Override
public ObjectInspector initialize(ObjectInspector[] arg0)
throws UDFArgumentException {
if (arg0.length != 2) {
throw new UDFArgumentLengthException("The function MovingAvgUDF(List, length) needs two arguments.");
}
this.listInspector = (ListObjectInspector) arg0[0];
this.dayWindowInspector = (IntObjectInspector) arg0[1];
return ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaDoubleObjectInspector);
}
}
青春就应该这样绽放 游戏测试:三国时期谁是你最好的兄弟!! 你不得不信的星座秘密