一些Hive UDF UDAF UDTF 例子 来源于Hive源码
1
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.contrib.udf.example;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
/**
* UDFExampleAdd.
*
*/
@Description(name = "example_add", value = "_FUNC_(expr) - Example UDAF that returns the sum")
public class UDFExampleAdd extends UDF {
public Integer evaluate(Integer... a) {
int total = 0;
for (Integer element : a) {
if (element != null) {
total += element;
}
}
return total;
}
public Double evaluate(Double... a) {
double total = 0;
for (Double element : a) {
if (element != null) {
total += element;
}
}
return total;
}
}
2
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.contrib.udf.example;
import java.util.List;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
/**
* UDFExampleArraySum.
*
*/
@Description(name = "example_arraysum", value = "_FUNC_(expr) - Example UDAF that returns the sum")
public class UDFExampleArraySum extends UDF {
public Double evaluate(List<Double> a) {
if (a == null) {
return null;
}
double total = 0;
for (int i = 0; i < a.size(); i++) {
Double e = a.get(i);
if (e != null) {
total += e;
}
}
return total;
}
}
3
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.contrib.udf.example;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
/**
* UDFExampleFormat.
*
*/
@Description(name = "example_format", value = "_FUNC_(expr) - Example UDAF that returns formated String")
public class UDFExampleFormat extends UDF {
public String evaluate(String format, Object... args) {
return String.format(format, args);
}
}
4
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.contrib.udf.example;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Map;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
/**
* UDFExampleMapConcat.
*
*/
@Description(name = "example_mapconcat",
value = "_FUNC_(expr) - Example UDAF that returns contents of Map as a formated String")
public class UDFExampleMapConcat extends UDF {
public String evaluate(Map<String, String> a) {
if (a == null) {
return null;
}
ArrayList<String> r = new ArrayList<String>(a.size());
for (Map.Entry<String, String> entry : a.entrySet()) {
r.add("(" + entry.getKey() + ":" + entry.getValue() + ")");
}
Collections.sort(r);
StringBuilder sb = new StringBuilder();
for (int i = 0; i < r.size(); i++) {
sb.append(r.get(i));
}
return sb.toString();
}
}
5
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.contrib.udf.example;
import java.util.List;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDF;
/**
* UDFExampleStructPrint.
*
*/
@Description(name = "example_structprint",
value = "_FUNC_(obj) - Example UDAF that returns contents of an object")
public class UDFExampleStructPrint extends UDF {
public String evaluate(Object a) {
if (a == null) {
return null;
}
List<Object> s = (List<Object>) a;
StringBuilder sb = new StringBuilder();
for (int i = 0; i < s.size(); i++) {
sb.append("(" + i + ":" + s.get(i) + ")");
}
return sb.toString();
}
}
1
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.contrib.udaf.example;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDAF;
import org.apache.hadoop.hive.ql.exec.UDAFEvaluator;
/**
* This is a simple UDAF that calculates average.
*
* It should be very easy to follow and can be used as an example for writing
* new UDAFs.
*
* Note that Hive internally uses a different mechanism (called GenericUDAF) to
* implement built-in aggregation functions, which are harder to program but
* more efficient.
*
*/
@Description(name = "example_avg",
value = "_FUNC_(col) - Example UDAF to compute average")
public final class UDAFExampleAvg extends UDAF {
/**
* The internal state of an aggregation for average.
*
* Note that this is only needed if the internal state cannot be represented
* by a primitive.
*
* The internal state can also contains fields with types like
* ArrayList
and HashMap if needed. */
public static class UDAFAvgState {
private long mCount;
private double mSum;
}
/**
* The actual class for doing the aggregation. Hive will automatically look
* for all internal classes of the UDAF that implements UDAFEvaluator.
*/
public static class UDAFExampleAvgEvaluator implements UDAFEvaluator {
UDAFAvgState state;
public UDAFExampleAvgEvaluator() {
super();
state = new UDAFAvgState();
init();
}
/**
* Reset the state of the aggregation.
*/
public void init() {
state.mSum = 0;
state.mCount = 0;
}
/**
* Iterate through one row of original data.
*
* The number and type of arguments need to the same as we call this UDAF
* from Hive command line.
*
* This function should always return true.
*/
public boolean iterate(Double o) {
if (o != null) {
state.mSum += o;
state.mCount++;
}
return true;
}
/**
* Terminate a partial aggregation and return the state. If the state is a
* primitive, just return primitive Java classes like Integer or String.
*/
public UDAFAvgState terminatePartial() {
// This is SQL standard - average of zero items should be null.
return state.mCount == 0 ? null : state;
}
/**
* Merge with a partial aggregation.
*
* This function should always have a single argument which has the same
* type as the return value of terminatePartial().
*/
public boolean merge(UDAFAvgState o) {
if (o != null) {
state.mSum += o.mSum;
state.mCount += o.mCount;
}
return true;
}
/**
* Terminates the aggregation and return the final result.
*/
public Double terminate() {
// This is SQL standard - average of zero items should be null.
return state.mCount == 0 ? null : Double.valueOf(state.mSum
/ state.mCount);
}
}
private UDAFExampleAvg() {
// prevent instantiation
}
}
2
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.contrib.udaf.example;
import java.util.ArrayList;
import java.util.Collections;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDAF;
import org.apache.hadoop.hive.ql.exec.UDAFEvaluator;
/**
* This is a simple UDAF that concatenates all arguments from different rows
* into a single string.
*
* It should be very easy to follow and can be used as an example for writing
* new UDAFs.
*
* Note that Hive internally uses a different mechanism (called GenericUDAF) to
* implement built-in aggregation functions, which are harder to program but
* more efficient.
*/
@Description(name = "example_group_concat",
value = "_FUNC_(col) - Example UDAF that concatenates all arguments from different rows into a single string")
public class UDAFExampleGroupConcat extends UDAF {
/**
* The actual class for doing the aggregation. Hive will automatically look
* for all internal classes of the UDAF that implements UDAFEvaluator.
*/
public static class UDAFExampleGroupConcatEvaluator implements UDAFEvaluator {
ArrayList<String> data;
public UDAFExampleGroupConcatEvaluator() {
super();
data = new ArrayList<String>();
}
/**
* Reset the state of the aggregation.
*/
public void init() {
data.clear();
}
/**
* Iterate through one row of original data.
*
* This UDF accepts arbitrary number of String arguments, so we use
* String[]. If it only accepts a single String, then we should use a single
* String argument.
*
* This function should always return true.
*/
public boolean iterate(String[] o) {
if (o != null) {
StringBuilder sb = new StringBuilder();
for (String element : o) {
sb.append(element);
}
data.add(sb.toString());
}
return true;
}
/**
* Terminate a partial aggregation and return the state.
*/
public ArrayList<String> terminatePartial() {
return data;
}
/**
* Merge with a partial aggregation.
*
* This function should always have a single argument which has the same
* type as the return value of terminatePartial().
*
* This function should always return true.
*/
public boolean merge(ArrayList<String> o) {
if (o != null) {
data.addAll(o);
}
return true;
}
/**
* Terminates the aggregation and return the final result.
*/
public String terminate() {
Collections.sort(data);
StringBuilder sb = new StringBuilder();
for (int i = 0; i < data.size(); i++) {
sb.append(data.get(i));
}
return sb.toString();
}
}
}
3
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.contrib.udaf.example;
import org.apache.hadoop.hive.ql.exec.UDAF;
import org.apache.hadoop.hive.ql.exec.UDAFEvaluator;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
@Description(name = "example_max", value = "_FUNC_(expr) - Returns the maximum value of expr")
public class UDAFExampleMax extends UDAF {
static public class MaxShortEvaluator implements UDAFEvaluator {
private short mMax;
private boolean mEmpty;
public MaxShortEvaluator() {
super();
init();
}
public void init() {
mMax = 0;
mEmpty = true;
}
public boolean iterate(ShortWritable o) {
if (o != null) {
if (mEmpty) {
mMax = o.get();
mEmpty = false;
} else {
mMax = (short) Math.max(mMax, o.get());
}
}
return true;
}
public ShortWritable terminatePartial() {
return mEmpty ? null : new ShortWritable(mMax);
}
public boolean merge(ShortWritable o) {
return iterate(o);
}
public ShortWritable terminate() {
return mEmpty ? null : new ShortWritable(mMax);
}
}
static public class MaxIntEvaluator implements UDAFEvaluator {
private int mMax;
private boolean mEmpty;
public MaxIntEvaluator() {
super();
init();
}
public void init() {
mMax = 0;
mEmpty = true;
}
public boolean iterate(IntWritable o) {
if (o != null) {
if (mEmpty) {
mMax = o.get();
mEmpty = false;
} else {
mMax = Math.max(mMax, o.get());
}
}
return true;
}
public IntWritable terminatePartial() {
return mEmpty ? null : new IntWritable(mMax);
}
public boolean merge(IntWritable o) {
return iterate(o);
}
public IntWritable terminate() {
return mEmpty ? null : new IntWritable(mMax);
}
}
static public class MaxLongEvaluator implements UDAFEvaluator {
private long mMax;
private boolean mEmpty;
public MaxLongEvaluator() {
super();
init();
}
public void init() {
mMax = 0;
mEmpty = true;
}
public boolean iterate(LongWritable o) {
if (o != null) {
if (mEmpty) {
mMax = o.get();
mEmpty = false;
} else {
mMax = Math.max(mMax, o.get());
}
}
return true;
}
public LongWritable terminatePartial() {
return mEmpty ? null : new LongWritable(mMax);
}
public boolean merge(LongWritable o) {
return iterate(o);
}
public LongWritable terminate() {
return mEmpty ? null : new LongWritable(mMax);
}
}
static public class MaxFloatEvaluator implements UDAFEvaluator {
private float mMax;
private boolean mEmpty;
public MaxFloatEvaluator() {
super();
init();
}
public void init() {
mMax = 0;
mEmpty = true;
}
public boolean iterate(FloatWritable o) {
if (o != null) {
if (mEmpty) {
mMax = o.get();
mEmpty = false;
} else {
mMax = Math.max(mMax, o.get());
}
}
return true;
}
public FloatWritable terminatePartial() {
return mEmpty ? null : new FloatWritable(mMax);
}
public boolean merge(FloatWritable o) {
return iterate(o);
}
public FloatWritable terminate() {
return mEmpty ? null : new FloatWritable(mMax);
}
}
static public class MaxDoubleEvaluator implements UDAFEvaluator {
private double mMax;
private boolean mEmpty;
public MaxDoubleEvaluator() {
super();
init();
}
public void init() {
mMax = 0;
mEmpty = true;
}
public boolean iterate(DoubleWritable o) {
if (o != null) {
if (mEmpty) {
mMax = o.get();
mEmpty = false;
} else {
mMax = Math.max(mMax, o.get());
}
}
return true;
}
public DoubleWritable terminatePartial() {
return mEmpty ? null : new DoubleWritable(mMax);
}
public boolean merge(DoubleWritable o) {
return iterate(o);
}
public DoubleWritable terminate() {
return mEmpty ? null : new DoubleWritable(mMax);
}
}
static public class MaxStringEvaluator implements UDAFEvaluator {
private Text mMax;
private boolean mEmpty;
public MaxStringEvaluator() {
super();
init();
}
public void init() {
mMax = null;
mEmpty = true;
}
public boolean iterate(Text o) {
if (o != null) {
if (mEmpty) {
mMax = new Text(o);
mEmpty = false;
} else if (mMax.compareTo(o) < 0) {
mMax.set(o);
}
}
return true;
}
public Text terminatePartial() {
return mEmpty ? null : mMax;
}
public boolean merge(Text o) {
return iterate(o);
}
public Text terminate() {
return mEmpty ? null : mMax;
}
}
}
4
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.contrib.udaf.example;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import org.apache.hadoop.hive.ql.exec.UDAFEvaluator;
/**
* The utility class for UDAFMaxN and UDAFMinN.
*/
public final class UDAFExampleMaxMinNUtil {
/**
* This class stores the information during an aggregation.
*
* Note that this class has to have a public constructor, so that Hive can
* serialize/deserialize this class using reflection.
*/
public static class State {
ArrayList<Double> a; // This ArrayList holds the max/min N
int n; // This is the N
}
/**
* The base class of the UDAFEvaluator for UDAFMaxN and UDAFMinN.
* We just need to override the getAscending function to make it work.
*/
public abstract static class Evaluator implements UDAFEvaluator {
private State state;
public Evaluator() {
state = new State();
init();
}
/**
* Reset the state.
*/
public void init() {
state.a = new ArrayList<Double>();
state.n = 0;
}
/**
* Returns true in UDAFMaxN, and false in UDAFMinN.
*/
protected abstract boolean getAscending();
/**
* Iterate through one row of original data.
* This function will update the internal max/min buffer if the internal buffer is not full,
* or the new row is larger/smaller than the current max/min n.
*/
public boolean iterate(Double o, int n) {
boolean ascending = getAscending();
state.n = n;
if (o != null) {
boolean doInsert = state.a.size() < n;
if (!doInsert) {
Double last = state.a.get(state.a.size()-1);
if (ascending) {
doInsert = o < last;
} else {
doInsert = o > last;
}
}
if (doInsert) {
binaryInsert(state.a, o, ascending);
if (state.a.size() > n) {
state.a.remove(state.a.size()-1);
}
}
}
return true;
}
/**
* Get partial aggregation results.
*/
public State terminatePartial() {
// This is SQL standard - max_n of zero items should be null.
return state.a.size() == 0 ? null : state;
}
/** Two pointers are created to track the maximal elements in both o and MaxNArray.
* The smallest element is added into tempArrayList
* Consider the sizes of o and MaxNArray may be different.
*/
public boolean merge(State o) {
if (o != null) {
state.n = o.n;
state.a = sortedMerge(o.a, state.a, getAscending(), o.n);
}
return true;
}
/**
* Terminates the max N lookup and return the final result.
*/
public ArrayList<Double> terminate() {
// This is SQL standard - return state.MaxNArray, or null if the size is zero.
return state.a.size() == 0 ? null : state.a;
}
}
/**
* Returns a comparator based on whether the order is ascending or not.
* Has a dummy parameter to make sure generics can infer the type correctly.
*/
static <T extends Comparable<T>> Comparator<T> getComparator(boolean ascending, T dummy) {
Comparator<T> comp;
if (ascending) {
comp = new Comparator<T>() {
public int compare(T o1, T o2) {
return o1.compareTo(o2);
}
};
} else {
comp = new Comparator<T>() {
public int compare(T o1, T o2) {
return o2.compareTo(o1);
}
};
}
return comp;
}
/**
* Insert an element into an ascending/descending array, and keep the order.
* @param ascending
* if true, the array is sorted in ascending order,
* otherwise it is in descending order.
*
*/
static <T extends Comparable<T>> void binaryInsert(List<T> list, T value, boolean ascending) {
int position = Collections.binarySearch(list, value, getComparator(ascending, (T)null));
if (position < 0) {
position = (-position) - 1;
}
list.add(position, value);
}
/**
* Merge two ascending/descending array and keep the first n elements.
* @param ascending
* if true, the array is sorted in ascending order,
* otherwise it is in descending order.
*/
static <T extends Comparable<T>> ArrayList<T> sortedMerge(List<T> a1, List<T> a2,
boolean ascending, int n) {
Comparator<T> comparator = getComparator(ascending, (T)null);
int n1 = a1.size();
int n2 = a2.size();
int p1 = 0; // The current element in a1
int p2 = 0; // The current element in a2
ArrayList<T> output = new ArrayList<T>(n);
while (output.size() < n && (p1 < n1 || p2 < n2)) {
if (p1 < n1) {
if (p2 == n2 || comparator.compare(a1.get(p1), a2.get(p2)) < 0) {
output.add(a1.get(p1++));
}
}
if (output.size() == n) {
break;
}
if (p2 < n2) {
if (p1 == n1 || comparator.compare(a2.get(p2), a1.get(p1)) < 0) {
output.add(a2.get(p2++));
}
}
}
return output;
}
// No instantiation.
private UDAFExampleMaxMinNUtil() {
}
}
5.
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.contrib.udaf.example;
import org.apache.hadoop.hive.ql.exec.UDAF;
import org.apache.hadoop.hive.ql.exec.UDAFEvaluator;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
@Description(name = "example_min", value = "_FUNC_(expr) - Returns the minimum value of expr")
public class UDAFExampleMin extends UDAF {
static public class MinShortEvaluator implements UDAFEvaluator {
private short mMin;
private boolean mEmpty;
public MinShortEvaluator() {
super();
init();
}
public void init() {
mMin = 0;
mEmpty = true;
}
public boolean iterate(ShortWritable o) {
if (o != null) {
if (mEmpty) {
mMin = o.get();
mEmpty = false;
} else {
mMin = (short) Math.min(mMin, o.get());
}
}
return true;
}
public ShortWritable terminatePartial() {
return mEmpty ? null : new ShortWritable(mMin);
}
public boolean merge(ShortWritable o) {
return iterate(o);
}
public ShortWritable terminate() {
return mEmpty ? null : new ShortWritable(mMin);
}
}
static public class MinIntEvaluator implements UDAFEvaluator {
private int mMin;
private boolean mEmpty;
public MinIntEvaluator() {
super();
init();
}
public void init() {
mMin = 0;
mEmpty = true;
}
public boolean iterate(IntWritable o) {
if (o != null) {
if (mEmpty) {
mMin = o.get();
mEmpty = false;
} else {
mMin = Math.min(mMin, o.get());
}
}
return true;
}
public IntWritable terminatePartial() {
return mEmpty ? null : new IntWritable(mMin);
}
public boolean merge(IntWritable o) {
return iterate(o);
}
public IntWritable terminate() {
return mEmpty ? null : new IntWritable(mMin);
}
}
static public class MinLongEvaluator implements UDAFEvaluator {
private long mMin;
private boolean mEmpty;
public MinLongEvaluator() {
super();
init();
}
public void init() {
mMin = 0;
mEmpty = true;
}
public boolean iterate(LongWritable o) {
if (o != null) {
if (mEmpty) {
mMin = o.get();
mEmpty = false;
} else {
mMin = Math.min(mMin, o.get());
}
}
return true;
}
public LongWritable terminatePartial() {
return mEmpty ? null : new LongWritable(mMin);
}
public boolean merge(LongWritable o) {
return iterate(o);
}
public LongWritable terminate() {
return mEmpty ? null : new LongWritable(mMin);
}
}
static public class MinFloatEvaluator implements UDAFEvaluator {
private float mMin;
private boolean mEmpty;
public MinFloatEvaluator() {
super();
init();
}
public void init() {
mMin = 0;
mEmpty = true;
}
public boolean iterate(FloatWritable o) {
if (o != null) {
if (mEmpty) {
mMin = o.get();
mEmpty = false;
} else {
mMin = Math.min(mMin, o.get());
}
}
return true;
}
public FloatWritable terminatePartial() {
return mEmpty ? null : new FloatWritable(mMin);
}
public boolean merge(FloatWritable o) {
return iterate(o);
}
public FloatWritable terminate() {
return mEmpty ? null : new FloatWritable(mMin);
}
}
static public class MinDoubleEvaluator implements UDAFEvaluator {
private double mMin;
private boolean mEmpty;
public MinDoubleEvaluator() {
super();
init();
}
public void init() {
mMin = 0;
mEmpty = true;
}
public boolean iterate(DoubleWritable o) {
if (o != null) {
if (mEmpty) {
mMin = o.get();
mEmpty = false;
} else {
mMin = Math.min(mMin, o.get());
}
}
return true;
}
public DoubleWritable terminatePartial() {
return mEmpty ? null : new DoubleWritable(mMin);
}
public boolean merge(DoubleWritable o) {
return iterate(o);
}
public DoubleWritable terminate() {
return mEmpty ? null : new DoubleWritable(mMin);
}
}
static public class MinStringEvaluator implements UDAFEvaluator {
private Text mMin;
private boolean mEmpty;
public MinStringEvaluator() {
super();
init();
}
public void init() {
mMin = null;
mEmpty = true;
}
public boolean iterate(Text o) {
if (o != null) {
if (mEmpty) {
mMin = new Text(o);
mEmpty = false;
} else if (mMin.compareTo(o) > 0) {
mMin.set(o);
}
}
return true;
}
public Text terminatePartial() {
return mEmpty ? null : mMin;
}
public boolean merge(Text o) {
return iterate(o);
}
public Text terminate() {
return mEmpty ? null : mMin;
}
}
}
1
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.contrib.udtf.example;
import java.util.ArrayList;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
/**
* GenericUDTFCount2 outputs the number of rows seen, twice. It's output twice
* to test outputting of rows on close with lateral view.
*
*/
@Description(name = "udtfCount2",
value = "_FUNC_(col) - UDF outputs the number of rows seen, twice.")
public class GenericUDTFCount2 extends GenericUDTF {
private transient Integer count = Integer.valueOf(0);
private transient Object forwardObj[] = new Object[1];
@Override
public void close() throws HiveException {
forwardObj[0] = count;
forward(forwardObj);
forward(forwardObj);
}
@Override
public StructObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
ArrayList<String> fieldNames = new ArrayList<String>();
ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>();
fieldNames.add("col1");
fieldOIs.add(PrimitiveObjectInspectorFactory.javaIntObjectInspector);
return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames,
fieldOIs);
}
@Override
public void process(Object[] args) throws HiveException {
count = Integer.valueOf(count.intValue() + 1);
}
}
2
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.contrib.udtf.example;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
/**
* GenericUDTFExplode2.
*
*/
@Description(name = "explode2",
value = "_FUNC_(a) - like explode, but outputs two identical columns (for testing purposes)")
public class GenericUDTFExplode2 extends GenericUDTF {
private transient ListObjectInspector listOI = null;
@Override
public void close() throws HiveException {
}
@Override
public StructObjectInspector initialize(ObjectInspector[] args)
throws UDFArgumentException {
if (args.length != 1) {
throw new UDFArgumentException("explode() takes only one argument");
}
if (args[0].getCategory() != ObjectInspector.Category.LIST) {
throw new UDFArgumentException("explode() takes an array as a parameter");
}
listOI = (ListObjectInspector) args[0];
ArrayList<String> fieldNames = new ArrayList<String>();
ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>();
fieldNames.add("col1");
fieldNames.add("col2");
fieldOIs.add(listOI.getListElementObjectInspector());
fieldOIs.add(listOI.getListElementObjectInspector());
return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames,
fieldOIs);
}
private transient Object forwardObj[] = new Object[2];
@Override
public void process(Object[] o) throws HiveException {
List> list = listOI.getList(o[0]);
for (Object r : list) {
forwardObj[0] = r;
forwardObj[1] = r;
forward(forwardObj);
}
}
@Override
public String toString() {
return "explode";
}
}