Hive UDF /UDAF /UDTF Example

一些Hive UDF UDAF UDTF 例子 来源于Hive源码


  • UDF:


1


    
    
    
    
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.contrib.udf.example;
  19. import org.apache.hadoop.hive.ql.exec.Description;
  20. import org.apache.hadoop.hive.ql.exec.UDF;
  21. /**
  22. * UDFExampleAdd.
  23. *
  24. */
  25. @Description(name = "example_add", value = "_FUNC_(expr) - Example UDAF that returns the sum")
  26. public class UDFExampleAdd extends UDF {
  27. public Integer evaluate(Integer... a) {
  28. int total = 0;
  29. for (Integer element : a) {
  30. if (element != null) {
  31. total += element;
  32. }
  33. }
  34. return total;
  35. }
  36. public Double evaluate(Double... a) {
  37. double total = 0;
  38. for (Double element : a) {
  39. if (element != null) {
  40. total += element;
  41. }
  42. }
  43. return total;
  44. }
  45. }


2


    
    
    
    
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.contrib.udf.example;
  19. import java.util.List;
  20. import org.apache.hadoop.hive.ql.exec.Description;
  21. import org.apache.hadoop.hive.ql.exec.UDF;
  22. /**
  23. * UDFExampleArraySum.
  24. *
  25. */
  26. @Description(name = "example_arraysum", value = "_FUNC_(expr) - Example UDAF that returns the sum")
  27. public class UDFExampleArraySum extends UDF {
  28. public Double evaluate(List<Double> a) {
  29. if (a == null) {
  30. return null;
  31. }
  32. double total = 0;
  33. for (int i = 0; i < a.size(); i++) {
  34. Double e = a.get(i);
  35. if (e != null) {
  36. total += e;
  37. }
  38. }
  39. return total;
  40. }
  41. }



3

    
    
    
    
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.contrib.udf.example;
  19. import org.apache.hadoop.hive.ql.exec.Description;
  20. import org.apache.hadoop.hive.ql.exec.UDF;
  21. /**
  22. * UDFExampleFormat.
  23. *
  24. */
  25. @Description(name = "example_format", value = "_FUNC_(expr) - Example UDAF that returns formated String")
  26. public class UDFExampleFormat extends UDF {
  27. public String evaluate(String format, Object... args) {
  28. return String.format(format, args);
  29. }
  30. }




4

    
    
    
    
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.contrib.udf.example;
  19. import java.util.ArrayList;
  20. import java.util.Collections;
  21. import java.util.Map;
  22. import org.apache.hadoop.hive.ql.exec.Description;
  23. import org.apache.hadoop.hive.ql.exec.UDF;
  24. /**
  25. * UDFExampleMapConcat.
  26. *
  27. */
  28. @Description(name = "example_mapconcat",
  29. value = "_FUNC_(expr) - Example UDAF that returns contents of Map as a formated String")
  30. public class UDFExampleMapConcat extends UDF {
  31. public String evaluate(Map<String, String> a) {
  32. if (a == null) {
  33. return null;
  34. }
  35. ArrayList<String> r = new ArrayList<String>(a.size());
  36. for (Map.Entry<String, String> entry : a.entrySet()) {
  37. r.add("(" + entry.getKey() + ":" + entry.getValue() + ")");
  38. }
  39. Collections.sort(r);
  40. StringBuilder sb = new StringBuilder();
  41. for (int i = 0; i < r.size(); i++) {
  42. sb.append(r.get(i));
  43. }
  44. return sb.toString();
  45. }
  46. }



5



    
    
    
    
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.contrib.udf.example;
  19. import java.util.List;
  20. import org.apache.hadoop.hive.ql.exec.Description;
  21. import org.apache.hadoop.hive.ql.exec.UDF;
  22. /**
  23. * UDFExampleStructPrint.
  24. *
  25. */
  26. @Description(name = "example_structprint",
  27. value = "_FUNC_(obj) - Example UDAF that returns contents of an object")
  28. public class UDFExampleStructPrint extends UDF {
  29. public String evaluate(Object a) {
  30. if (a == null) {
  31. return null;
  32. }
  33. List<Object> s = (List<Object>) a;
  34. StringBuilder sb = new StringBuilder();
  35. for (int i = 0; i < s.size(); i++) {
  36. sb.append("(" + i + ":" + s.get(i) + ")");
  37. }
  38. return sb.toString();
  39. }
  40. }



  • UDAF


1



    
    
    
    
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.contrib.udaf.example;
  19. import org.apache.hadoop.hive.ql.exec.Description;
  20. import org.apache.hadoop.hive.ql.exec.UDAF;
  21. import org.apache.hadoop.hive.ql.exec.UDAFEvaluator;
  22. /**
  23. * This is a simple UDAF that calculates average.
  24. *
  25. * It should be very easy to follow and can be used as an example for writing
  26. * new UDAFs.
  27. *
  28. * Note that Hive internally uses a different mechanism (called GenericUDAF) to
  29. * implement built-in aggregation functions, which are harder to program but
  30. * more efficient.
  31. *
  32. */
  33. @Description(name = "example_avg",
  34. value = "_FUNC_(col) - Example UDAF to compute average")
  35. public final class UDAFExampleAvg extends UDAF {
  36. /**
  37. * The internal state of an aggregation for average.
  38. *
  39. * Note that this is only needed if the internal state cannot be represented
  40. * by a primitive.
  41. *
  42. * The internal state can also contains fields with types like
  43. * ArrayList and HashMap if needed.
  44. */
  45. public static class UDAFAvgState {
  46. private long mCount;
  47. private double mSum;
  48. }
  49. /**
  50. * The actual class for doing the aggregation. Hive will automatically look
  51. * for all internal classes of the UDAF that implements UDAFEvaluator.
  52. */
  53. public static class UDAFExampleAvgEvaluator implements UDAFEvaluator {
  54. UDAFAvgState state;
  55. public UDAFExampleAvgEvaluator() {
  56. super();
  57. state = new UDAFAvgState();
  58. init();
  59. }
  60. /**
  61. * Reset the state of the aggregation.
  62. */
  63. public void init() {
  64. state.mSum = 0;
  65. state.mCount = 0;
  66. }
  67. /**
  68. * Iterate through one row of original data.
  69. *
  70. * The number and type of arguments need to the same as we call this UDAF
  71. * from Hive command line.
  72. *
  73. * This function should always return true.
  74. */
  75. public boolean iterate(Double o) {
  76. if (o != null) {
  77. state.mSum += o;
  78. state.mCount++;
  79. }
  80. return true;
  81. }
  82. /**
  83. * Terminate a partial aggregation and return the state. If the state is a
  84. * primitive, just return primitive Java classes like Integer or String.
  85. */
  86. public UDAFAvgState terminatePartial() {
  87. // This is SQL standard - average of zero items should be null.
  88. return state.mCount == 0 ? null : state;
  89. }
  90. /**
  91. * Merge with a partial aggregation.
  92. *
  93. * This function should always have a single argument which has the same
  94. * type as the return value of terminatePartial().
  95. */
  96. public boolean merge(UDAFAvgState o) {
  97. if (o != null) {
  98. state.mSum += o.mSum;
  99. state.mCount += o.mCount;
  100. }
  101. return true;
  102. }
  103. /**
  104. * Terminates the aggregation and return the final result.
  105. */
  106. public Double terminate() {
  107. // This is SQL standard - average of zero items should be null.
  108. return state.mCount == 0 ? null : Double.valueOf(state.mSum
  109. / state.mCount);
  110. }
  111. }
  112. private UDAFExampleAvg() {
  113. // prevent instantiation
  114. }
  115. }

2


    
    
    
    
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.contrib.udaf.example;
  19. import java.util.ArrayList;
  20. import java.util.Collections;
  21. import org.apache.hadoop.hive.ql.exec.Description;
  22. import org.apache.hadoop.hive.ql.exec.UDAF;
  23. import org.apache.hadoop.hive.ql.exec.UDAFEvaluator;
  24. /**
  25. * This is a simple UDAF that concatenates all arguments from different rows
  26. * into a single string.
  27. *
  28. * It should be very easy to follow and can be used as an example for writing
  29. * new UDAFs.
  30. *
  31. * Note that Hive internally uses a different mechanism (called GenericUDAF) to
  32. * implement built-in aggregation functions, which are harder to program but
  33. * more efficient.
  34. */
  35. @Description(name = "example_group_concat",
  36. value = "_FUNC_(col) - Example UDAF that concatenates all arguments from different rows into a single string")
  37. public class UDAFExampleGroupConcat extends UDAF {
  38. /**
  39. * The actual class for doing the aggregation. Hive will automatically look
  40. * for all internal classes of the UDAF that implements UDAFEvaluator.
  41. */
  42. public static class UDAFExampleGroupConcatEvaluator implements UDAFEvaluator {
  43. ArrayList<String> data;
  44. public UDAFExampleGroupConcatEvaluator() {
  45. super();
  46. data = new ArrayList<String>();
  47. }
  48. /**
  49. * Reset the state of the aggregation.
  50. */
  51. public void init() {
  52. data.clear();
  53. }
  54. /**
  55. * Iterate through one row of original data.
  56. *
  57. * This UDF accepts arbitrary number of String arguments, so we use
  58. * String[]. If it only accepts a single String, then we should use a single
  59. * String argument.
  60. *
  61. * This function should always return true.
  62. */
  63. public boolean iterate(String[] o) {
  64. if (o != null) {
  65. StringBuilder sb = new StringBuilder();
  66. for (String element : o) {
  67. sb.append(element);
  68. }
  69. data.add(sb.toString());
  70. }
  71. return true;
  72. }
  73. /**
  74. * Terminate a partial aggregation and return the state.
  75. */
  76. public ArrayList<String> terminatePartial() {
  77. return data;
  78. }
  79. /**
  80. * Merge with a partial aggregation.
  81. *
  82. * This function should always have a single argument which has the same
  83. * type as the return value of terminatePartial().
  84. *
  85. * This function should always return true.
  86. */
  87. public boolean merge(ArrayList<String> o) {
  88. if (o != null) {
  89. data.addAll(o);
  90. }
  91. return true;
  92. }
  93. /**
  94. * Terminates the aggregation and return the final result.
  95. */
  96. public String terminate() {
  97. Collections.sort(data);
  98. StringBuilder sb = new StringBuilder();
  99. for (int i = 0; i < data.size(); i++) {
  100. sb.append(data.get(i));
  101. }
  102. return sb.toString();
  103. }
  104. }
  105. }


3

    
    
    
    
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.contrib.udaf.example;
  19. import org.apache.hadoop.hive.ql.exec.UDAF;
  20. import org.apache.hadoop.hive.ql.exec.UDAFEvaluator;
  21. import org.apache.hadoop.hive.ql.exec.Description;
  22. import org.apache.hadoop.hive.serde2.io.DoubleWritable;
  23. import org.apache.hadoop.hive.serde2.io.ShortWritable;
  24. import org.apache.hadoop.hive.shims.ShimLoader;
  25. import org.apache.hadoop.io.FloatWritable;
  26. import org.apache.hadoop.io.IntWritable;
  27. import org.apache.hadoop.io.LongWritable;
  28. import org.apache.hadoop.io.Text;
  29. @Description(name = "example_max", value = "_FUNC_(expr) - Returns the maximum value of expr")
  30. public class UDAFExampleMax extends UDAF {
  31. static public class MaxShortEvaluator implements UDAFEvaluator {
  32. private short mMax;
  33. private boolean mEmpty;
  34. public MaxShortEvaluator() {
  35. super();
  36. init();
  37. }
  38. public void init() {
  39. mMax = 0;
  40. mEmpty = true;
  41. }
  42. public boolean iterate(ShortWritable o) {
  43. if (o != null) {
  44. if (mEmpty) {
  45. mMax = o.get();
  46. mEmpty = false;
  47. } else {
  48. mMax = (short) Math.max(mMax, o.get());
  49. }
  50. }
  51. return true;
  52. }
  53. public ShortWritable terminatePartial() {
  54. return mEmpty ? null : new ShortWritable(mMax);
  55. }
  56. public boolean merge(ShortWritable o) {
  57. return iterate(o);
  58. }
  59. public ShortWritable terminate() {
  60. return mEmpty ? null : new ShortWritable(mMax);
  61. }
  62. }
  63. static public class MaxIntEvaluator implements UDAFEvaluator {
  64. private int mMax;
  65. private boolean mEmpty;
  66. public MaxIntEvaluator() {
  67. super();
  68. init();
  69. }
  70. public void init() {
  71. mMax = 0;
  72. mEmpty = true;
  73. }
  74. public boolean iterate(IntWritable o) {
  75. if (o != null) {
  76. if (mEmpty) {
  77. mMax = o.get();
  78. mEmpty = false;
  79. } else {
  80. mMax = Math.max(mMax, o.get());
  81. }
  82. }
  83. return true;
  84. }
  85. public IntWritable terminatePartial() {
  86. return mEmpty ? null : new IntWritable(mMax);
  87. }
  88. public boolean merge(IntWritable o) {
  89. return iterate(o);
  90. }
  91. public IntWritable terminate() {
  92. return mEmpty ? null : new IntWritable(mMax);
  93. }
  94. }
  95. static public class MaxLongEvaluator implements UDAFEvaluator {
  96. private long mMax;
  97. private boolean mEmpty;
  98. public MaxLongEvaluator() {
  99. super();
  100. init();
  101. }
  102. public void init() {
  103. mMax = 0;
  104. mEmpty = true;
  105. }
  106. public boolean iterate(LongWritable o) {
  107. if (o != null) {
  108. if (mEmpty) {
  109. mMax = o.get();
  110. mEmpty = false;
  111. } else {
  112. mMax = Math.max(mMax, o.get());
  113. }
  114. }
  115. return true;
  116. }
  117. public LongWritable terminatePartial() {
  118. return mEmpty ? null : new LongWritable(mMax);
  119. }
  120. public boolean merge(LongWritable o) {
  121. return iterate(o);
  122. }
  123. public LongWritable terminate() {
  124. return mEmpty ? null : new LongWritable(mMax);
  125. }
  126. }
  127. static public class MaxFloatEvaluator implements UDAFEvaluator {
  128. private float mMax;
  129. private boolean mEmpty;
  130. public MaxFloatEvaluator() {
  131. super();
  132. init();
  133. }
  134. public void init() {
  135. mMax = 0;
  136. mEmpty = true;
  137. }
  138. public boolean iterate(FloatWritable o) {
  139. if (o != null) {
  140. if (mEmpty) {
  141. mMax = o.get();
  142. mEmpty = false;
  143. } else {
  144. mMax = Math.max(mMax, o.get());
  145. }
  146. }
  147. return true;
  148. }
  149. public FloatWritable terminatePartial() {
  150. return mEmpty ? null : new FloatWritable(mMax);
  151. }
  152. public boolean merge(FloatWritable o) {
  153. return iterate(o);
  154. }
  155. public FloatWritable terminate() {
  156. return mEmpty ? null : new FloatWritable(mMax);
  157. }
  158. }
  159. static public class MaxDoubleEvaluator implements UDAFEvaluator {
  160. private double mMax;
  161. private boolean mEmpty;
  162. public MaxDoubleEvaluator() {
  163. super();
  164. init();
  165. }
  166. public void init() {
  167. mMax = 0;
  168. mEmpty = true;
  169. }
  170. public boolean iterate(DoubleWritable o) {
  171. if (o != null) {
  172. if (mEmpty) {
  173. mMax = o.get();
  174. mEmpty = false;
  175. } else {
  176. mMax = Math.max(mMax, o.get());
  177. }
  178. }
  179. return true;
  180. }
  181. public DoubleWritable terminatePartial() {
  182. return mEmpty ? null : new DoubleWritable(mMax);
  183. }
  184. public boolean merge(DoubleWritable o) {
  185. return iterate(o);
  186. }
  187. public DoubleWritable terminate() {
  188. return mEmpty ? null : new DoubleWritable(mMax);
  189. }
  190. }
  191. static public class MaxStringEvaluator implements UDAFEvaluator {
  192. private Text mMax;
  193. private boolean mEmpty;
  194. public MaxStringEvaluator() {
  195. super();
  196. init();
  197. }
  198. public void init() {
  199. mMax = null;
  200. mEmpty = true;
  201. }
  202. public boolean iterate(Text o) {
  203. if (o != null) {
  204. if (mEmpty) {
  205. mMax = new Text(o);
  206. mEmpty = false;
  207. } else if (mMax.compareTo(o) < 0) {
  208. mMax.set(o);
  209. }
  210. }
  211. return true;
  212. }
  213. public Text terminatePartial() {
  214. return mEmpty ? null : mMax;
  215. }
  216. public boolean merge(Text o) {
  217. return iterate(o);
  218. }
  219. public Text terminate() {
  220. return mEmpty ? null : mMax;
  221. }
  222. }
  223. }

4


    
    
    
    
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.contrib.udaf.example;
  19. import java.util.ArrayList;
  20. import java.util.Collections;
  21. import java.util.Comparator;
  22. import java.util.List;
  23. import org.apache.hadoop.hive.ql.exec.UDAFEvaluator;
  24. /**
  25. * The utility class for UDAFMaxN and UDAFMinN.
  26. */
  27. public final class UDAFExampleMaxMinNUtil {
  28. /**
  29. * This class stores the information during an aggregation.
  30. *
  31. * Note that this class has to have a public constructor, so that Hive can
  32. * serialize/deserialize this class using reflection.
  33. */
  34. public static class State {
  35. ArrayList<Double> a; // This ArrayList holds the max/min N
  36. int n; // This is the N
  37. }
  38. /**
  39. * The base class of the UDAFEvaluator for UDAFMaxN and UDAFMinN.
  40. * We just need to override the getAscending function to make it work.
  41. */
  42. public abstract static class Evaluator implements UDAFEvaluator {
  43. private State state;
  44. public Evaluator() {
  45. state = new State();
  46. init();
  47. }
  48. /**
  49. * Reset the state.
  50. */
  51. public void init() {
  52. state.a = new ArrayList<Double>();
  53. state.n = 0;
  54. }
  55. /**
  56. * Returns true in UDAFMaxN, and false in UDAFMinN.
  57. */
  58. protected abstract boolean getAscending();
  59. /**
  60. * Iterate through one row of original data.
  61. * This function will update the internal max/min buffer if the internal buffer is not full,
  62. * or the new row is larger/smaller than the current max/min n.
  63. */
  64. public boolean iterate(Double o, int n) {
  65. boolean ascending = getAscending();
  66. state.n = n;
  67. if (o != null) {
  68. boolean doInsert = state.a.size() < n;
  69. if (!doInsert) {
  70. Double last = state.a.get(state.a.size()-1);
  71. if (ascending) {
  72. doInsert = o < last;
  73. } else {
  74. doInsert = o > last;
  75. }
  76. }
  77. if (doInsert) {
  78. binaryInsert(state.a, o, ascending);
  79. if (state.a.size() > n) {
  80. state.a.remove(state.a.size()-1);
  81. }
  82. }
  83. }
  84. return true;
  85. }
  86. /**
  87. * Get partial aggregation results.
  88. */
  89. public State terminatePartial() {
  90. // This is SQL standard - max_n of zero items should be null.
  91. return state.a.size() == 0 ? null : state;
  92. }
  93. /** Two pointers are created to track the maximal elements in both o and MaxNArray.
  94. * The smallest element is added into tempArrayList
  95. * Consider the sizes of o and MaxNArray may be different.
  96. */
  97. public boolean merge(State o) {
  98. if (o != null) {
  99. state.n = o.n;
  100. state.a = sortedMerge(o.a, state.a, getAscending(), o.n);
  101. }
  102. return true;
  103. }
  104. /**
  105. * Terminates the max N lookup and return the final result.
  106. */
  107. public ArrayList<Double> terminate() {
  108. // This is SQL standard - return state.MaxNArray, or null if the size is zero.
  109. return state.a.size() == 0 ? null : state.a;
  110. }
  111. }
  112. /**
  113. * Returns a comparator based on whether the order is ascending or not.
  114. * Has a dummy parameter to make sure generics can infer the type correctly.
  115. */
  116. static <T extends Comparable<T>> Comparator<T> getComparator(boolean ascending, T dummy) {
  117. Comparator<T> comp;
  118. if (ascending) {
  119. comp = new Comparator<T>() {
  120. public int compare(T o1, T o2) {
  121. return o1.compareTo(o2);
  122. }
  123. };
  124. } else {
  125. comp = new Comparator<T>() {
  126. public int compare(T o1, T o2) {
  127. return o2.compareTo(o1);
  128. }
  129. };
  130. }
  131. return comp;
  132. }
  133. /**
  134. * Insert an element into an ascending/descending array, and keep the order.
  135. * @param ascending
  136. * if true, the array is sorted in ascending order,
  137. * otherwise it is in descending order.
  138. *
  139. */
  140. static <T extends Comparable<T>> void binaryInsert(List<T> list, T value, boolean ascending) {
  141. int position = Collections.binarySearch(list, value, getComparator(ascending, (T)null));
  142. if (position < 0) {
  143. position = (-position) - 1;
  144. }
  145. list.add(position, value);
  146. }
  147. /**
  148. * Merge two ascending/descending array and keep the first n elements.
  149. * @param ascending
  150. * if true, the array is sorted in ascending order,
  151. * otherwise it is in descending order.
  152. */
  153. static <T extends Comparable<T>> ArrayList<T> sortedMerge(List<T> a1, List<T> a2,
  154. boolean ascending, int n) {
  155. Comparator<T> comparator = getComparator(ascending, (T)null);
  156. int n1 = a1.size();
  157. int n2 = a2.size();
  158. int p1 = 0; // The current element in a1
  159. int p2 = 0; // The current element in a2
  160. ArrayList<T> output = new ArrayList<T>(n);
  161. while (output.size() < n && (p1 < n1 || p2 < n2)) {
  162. if (p1 < n1) {
  163. if (p2 == n2 || comparator.compare(a1.get(p1), a2.get(p2)) < 0) {
  164. output.add(a1.get(p1++));
  165. }
  166. }
  167. if (output.size() == n) {
  168. break;
  169. }
  170. if (p2 < n2) {
  171. if (p1 == n1 || comparator.compare(a2.get(p2), a1.get(p1)) < 0) {
  172. output.add(a2.get(p2++));
  173. }
  174. }
  175. }
  176. return output;
  177. }
  178. // No instantiation.
  179. private UDAFExampleMaxMinNUtil() {
  180. }
  181. }


5.

    
    
    
    
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.contrib.udaf.example;
  19. import org.apache.hadoop.hive.ql.exec.UDAF;
  20. import org.apache.hadoop.hive.ql.exec.UDAFEvaluator;
  21. import org.apache.hadoop.hive.ql.exec.Description;
  22. import org.apache.hadoop.hive.serde2.io.DoubleWritable;
  23. import org.apache.hadoop.hive.serde2.io.ShortWritable;
  24. import org.apache.hadoop.hive.shims.ShimLoader;
  25. import org.apache.hadoop.io.FloatWritable;
  26. import org.apache.hadoop.io.IntWritable;
  27. import org.apache.hadoop.io.LongWritable;
  28. import org.apache.hadoop.io.Text;
  29. @Description(name = "example_min", value = "_FUNC_(expr) - Returns the minimum value of expr")
  30. public class UDAFExampleMin extends UDAF {
  31. static public class MinShortEvaluator implements UDAFEvaluator {
  32. private short mMin;
  33. private boolean mEmpty;
  34. public MinShortEvaluator() {
  35. super();
  36. init();
  37. }
  38. public void init() {
  39. mMin = 0;
  40. mEmpty = true;
  41. }
  42. public boolean iterate(ShortWritable o) {
  43. if (o != null) {
  44. if (mEmpty) {
  45. mMin = o.get();
  46. mEmpty = false;
  47. } else {
  48. mMin = (short) Math.min(mMin, o.get());
  49. }
  50. }
  51. return true;
  52. }
  53. public ShortWritable terminatePartial() {
  54. return mEmpty ? null : new ShortWritable(mMin);
  55. }
  56. public boolean merge(ShortWritable o) {
  57. return iterate(o);
  58. }
  59. public ShortWritable terminate() {
  60. return mEmpty ? null : new ShortWritable(mMin);
  61. }
  62. }
  63. static public class MinIntEvaluator implements UDAFEvaluator {
  64. private int mMin;
  65. private boolean mEmpty;
  66. public MinIntEvaluator() {
  67. super();
  68. init();
  69. }
  70. public void init() {
  71. mMin = 0;
  72. mEmpty = true;
  73. }
  74. public boolean iterate(IntWritable o) {
  75. if (o != null) {
  76. if (mEmpty) {
  77. mMin = o.get();
  78. mEmpty = false;
  79. } else {
  80. mMin = Math.min(mMin, o.get());
  81. }
  82. }
  83. return true;
  84. }
  85. public IntWritable terminatePartial() {
  86. return mEmpty ? null : new IntWritable(mMin);
  87. }
  88. public boolean merge(IntWritable o) {
  89. return iterate(o);
  90. }
  91. public IntWritable terminate() {
  92. return mEmpty ? null : new IntWritable(mMin);
  93. }
  94. }
  95. static public class MinLongEvaluator implements UDAFEvaluator {
  96. private long mMin;
  97. private boolean mEmpty;
  98. public MinLongEvaluator() {
  99. super();
  100. init();
  101. }
  102. public void init() {
  103. mMin = 0;
  104. mEmpty = true;
  105. }
  106. public boolean iterate(LongWritable o) {
  107. if (o != null) {
  108. if (mEmpty) {
  109. mMin = o.get();
  110. mEmpty = false;
  111. } else {
  112. mMin = Math.min(mMin, o.get());
  113. }
  114. }
  115. return true;
  116. }
  117. public LongWritable terminatePartial() {
  118. return mEmpty ? null : new LongWritable(mMin);
  119. }
  120. public boolean merge(LongWritable o) {
  121. return iterate(o);
  122. }
  123. public LongWritable terminate() {
  124. return mEmpty ? null : new LongWritable(mMin);
  125. }
  126. }
  127. static public class MinFloatEvaluator implements UDAFEvaluator {
  128. private float mMin;
  129. private boolean mEmpty;
  130. public MinFloatEvaluator() {
  131. super();
  132. init();
  133. }
  134. public void init() {
  135. mMin = 0;
  136. mEmpty = true;
  137. }
  138. public boolean iterate(FloatWritable o) {
  139. if (o != null) {
  140. if (mEmpty) {
  141. mMin = o.get();
  142. mEmpty = false;
  143. } else {
  144. mMin = Math.min(mMin, o.get());
  145. }
  146. }
  147. return true;
  148. }
  149. public FloatWritable terminatePartial() {
  150. return mEmpty ? null : new FloatWritable(mMin);
  151. }
  152. public boolean merge(FloatWritable o) {
  153. return iterate(o);
  154. }
  155. public FloatWritable terminate() {
  156. return mEmpty ? null : new FloatWritable(mMin);
  157. }
  158. }
  159. static public class MinDoubleEvaluator implements UDAFEvaluator {
  160. private double mMin;
  161. private boolean mEmpty;
  162. public MinDoubleEvaluator() {
  163. super();
  164. init();
  165. }
  166. public void init() {
  167. mMin = 0;
  168. mEmpty = true;
  169. }
  170. public boolean iterate(DoubleWritable o) {
  171. if (o != null) {
  172. if (mEmpty) {
  173. mMin = o.get();
  174. mEmpty = false;
  175. } else {
  176. mMin = Math.min(mMin, o.get());
  177. }
  178. }
  179. return true;
  180. }
  181. public DoubleWritable terminatePartial() {
  182. return mEmpty ? null : new DoubleWritable(mMin);
  183. }
  184. public boolean merge(DoubleWritable o) {
  185. return iterate(o);
  186. }
  187. public DoubleWritable terminate() {
  188. return mEmpty ? null : new DoubleWritable(mMin);
  189. }
  190. }
  191. static public class MinStringEvaluator implements UDAFEvaluator {
  192. private Text mMin;
  193. private boolean mEmpty;
  194. public MinStringEvaluator() {
  195. super();
  196. init();
  197. }
  198. public void init() {
  199. mMin = null;
  200. mEmpty = true;
  201. }
  202. public boolean iterate(Text o) {
  203. if (o != null) {
  204. if (mEmpty) {
  205. mMin = new Text(o);
  206. mEmpty = false;
  207. } else if (mMin.compareTo(o) > 0) {
  208. mMin.set(o);
  209. }
  210. }
  211. return true;
  212. }
  213. public Text terminatePartial() {
  214. return mEmpty ? null : mMin;
  215. }
  216. public boolean merge(Text o) {
  217. return iterate(o);
  218. }
  219. public Text terminate() {
  220. return mEmpty ? null : mMin;
  221. }
  222. }
  223. }




  • UDTF


1

    
    
    
    
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.contrib.udtf.example;
  19. import java.util.ArrayList;
  20. import org.apache.hadoop.hive.ql.exec.Description;
  21. import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
  22. import org.apache.hadoop.hive.ql.metadata.HiveException;
  23. import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
  24. import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
  25. import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
  26. import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
  27. import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
  28. /**
  29. * GenericUDTFCount2 outputs the number of rows seen, twice. It's output twice
  30. * to test outputting of rows on close with lateral view.
  31. *
  32. */
  33. @Description(name = "udtfCount2",
  34. value = "_FUNC_(col) - UDF outputs the number of rows seen, twice.")
  35. public class GenericUDTFCount2 extends GenericUDTF {
  36. private transient Integer count = Integer.valueOf(0);
  37. private transient Object forwardObj[] = new Object[1];
  38. @Override
  39. public void close() throws HiveException {
  40. forwardObj[0] = count;
  41. forward(forwardObj);
  42. forward(forwardObj);
  43. }
  44. @Override
  45. public StructObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
  46. ArrayList<String> fieldNames = new ArrayList<String>();
  47. ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>();
  48. fieldNames.add("col1");
  49. fieldOIs.add(PrimitiveObjectInspectorFactory.javaIntObjectInspector);
  50. return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames,
  51. fieldOIs);
  52. }
  53. @Override
  54. public void process(Object[] args) throws HiveException {
  55. count = Integer.valueOf(count.intValue() + 1);
  56. }
  57. }



2

    
    
    
    
  1. /**
  2. * Licensed to the Apache Software Foundation (ASF) under one
  3. * or more contributor license agreements. See the NOTICE file
  4. * distributed with this work for additional information
  5. * regarding copyright ownership. The ASF licenses this file
  6. * to you under the Apache License, Version 2.0 (the
  7. * "License"); you may not use this file except in compliance
  8. * with the License. You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. package org.apache.hadoop.hive.contrib.udtf.example;
  19. import java.util.ArrayList;
  20. import java.util.List;
  21. import org.apache.hadoop.hive.ql.exec.Description;
  22. import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
  23. import org.apache.hadoop.hive.ql.metadata.HiveException;
  24. import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
  25. import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
  26. import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
  27. import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
  28. import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
  29. /**
  30. * GenericUDTFExplode2.
  31. *
  32. */
  33. @Description(name = "explode2",
  34. value = "_FUNC_(a) - like explode, but outputs two identical columns (for testing purposes)")
  35. public class GenericUDTFExplode2 extends GenericUDTF {
  36. private transient ListObjectInspector listOI = null;
  37. @Override
  38. public void close() throws HiveException {
  39. }
  40. @Override
  41. public StructObjectInspector initialize(ObjectInspector[] args)
  42. throws UDFArgumentException {
  43. if (args.length != 1) {
  44. throw new UDFArgumentException("explode() takes only one argument");
  45. }
  46. if (args[0].getCategory() != ObjectInspector.Category.LIST) {
  47. throw new UDFArgumentException("explode() takes an array as a parameter");
  48. }
  49. listOI = (ListObjectInspector) args[0];
  50. ArrayList<String> fieldNames = new ArrayList<String>();
  51. ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>();
  52. fieldNames.add("col1");
  53. fieldNames.add("col2");
  54. fieldOIs.add(listOI.getListElementObjectInspector());
  55. fieldOIs.add(listOI.getListElementObjectInspector());
  56. return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames,
  57. fieldOIs);
  58. }
  59. private transient Object forwardObj[] = new Object[2];
  60. @Override
  61. public void process(Object[] o) throws HiveException {
  62. List list = listOI.getList(o[0]);
  63. for (Object r : list) {
  64. forwardObj[0] = r;
  65. forwardObj[1] = r;
  66. forward(forwardObj);
  67. }
  68. }
  69. @Override
  70. public String toString() {
  71. return "explode";
  72. }
  73. }



你可能感兴趣的:(-----Hive)