<wbr><wbr><wbr>用到的测试表为:</wbr></wbr></wbr>
hive> desc row_number_test;
OK
id1<wbr><wbr><wbr><wbr>int<br> id2<wbr><wbr><wbr><wbr>string<br> age<wbr><wbr><wbr><wbr>int<br> score<wbr><wbr> double<br> name<wbr><wbr><wbr>string</wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr>
<wbr></wbr>
hive> select * from row_number_test;
OK
2<wbr><wbr><wbr><wbr><wbr><wbr>t04<wbr><wbr><wbr><wbr>25<wbr><wbr><wbr><wbr><wbr>60.0<wbr><wbr><wbr>youlia<br> 1<wbr><wbr><wbr><wbr><wbr><wbr>t01<wbr><wbr><wbr><wbr>20<wbr><wbr><wbr><wbr><wbr>85.0<wbr><wbr><wbr>liujiannan<br> 1<wbr><wbr><wbr><wbr><wbr><wbr>t02<wbr><wbr><wbr><wbr>24<wbr><wbr><wbr><wbr><wbr>70.0<wbr><wbr><wbr>zengqiu<br> 2<wbr><wbr><wbr><wbr><wbr><wbr>t03<wbr><wbr><wbr><wbr>30<wbr><wbr><wbr><wbr><wbr>88.0<wbr><wbr><wbr>hongqu<br> 2<wbr><wbr><wbr><wbr><wbr><wbr>t03<wbr><wbr><wbr><wbr>27<wbr><wbr><wbr><wbr><wbr>70.0<wbr><wbr><wbr>yongqi<br> 1<wbr><wbr><wbr><wbr><wbr><wbr>t02<wbr><wbr><wbr><wbr>19<wbr><wbr><wbr><wbr><wbr>75.0<wbr><wbr><wbr>wangdong<br> 1<wbr><wbr><wbr><wbr><wbr><wbr>t02<wbr><wbr><wbr><wbr>24<wbr><wbr><wbr><wbr><wbr>70.0<wbr><wbr><wbr>zengqiu</wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr>
<wbr></wbr>
使用时要先在子查询中进行分区与排序,比如oracle中这样一句SQL:
select row_number() over (partition by id1 order by age desc)from row_number_test;
转换为hive语句应该是:
select row_number(id1) from<wbr><wbr>--partition by的字段传到row_number函数中去</wbr></wbr>
<wbr><wbr><wbr> (select *from row_number_test distribute by id1 sort by id1,age desc) a;</wbr></wbr></wbr>
<wbr></wbr>
如果partition by 两个字段:
select row_number() over (partition by id1,id2 orderby<wbr>score) from row_number_test;</wbr>
转换为hive语句应该是:
select row_number(id1,id2)<wbr><wbr>--partition by的字段传到row_number函数中去</wbr></wbr>
<wbr><wbr><wbr> from(select * from row_number_test distribute by id1,id2 sort byid1,id2,score) a;</wbr></wbr></wbr>
<wbr></wbr>
展示一下查询结果:
1.
select id1,id2,age,score,name,row_number(id1) rn from (select *from row_number_test distribute by id1 sort by id1,age desc) a;
<wbr></wbr>
OK
2<wbr><wbr><wbr><wbr><wbr><wbr>t03<wbr><wbr><wbr><wbr>30<wbr><wbr><wbr><wbr><wbr>88.0<wbr><wbr><wbr>hongqu<wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr>1<br> 2<wbr><wbr><wbr><wbr><wbr><wbr>t03<wbr><wbr><wbr><wbr>27<wbr><wbr><wbr><wbr><wbr>70.0<wbr><wbr><wbr>yongqi<wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr>2<br> 2<wbr><wbr><wbr><wbr><wbr><wbr>t04<wbr><wbr><wbr><wbr>25<wbr><wbr><wbr><wbr><wbr>60.0<wbr><wbr><wbr>youlia<wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr>3<br> 1<wbr><wbr><wbr><wbr><wbr><wbr>t02<wbr><wbr><wbr><wbr>24<wbr><wbr><wbr><wbr><wbr>70.0<wbr><wbr><wbr>zengqiu<wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr>1<br> 1<wbr><wbr><wbr><wbr><wbr><wbr>t02<wbr><wbr><wbr><wbr>24<wbr><wbr><wbr><wbr><wbr>70.0<wbr><wbr><wbr>zengqiu<wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr>2<br> 1<wbr><wbr><wbr><wbr><wbr><wbr>t01<wbr><wbr><wbr><wbr>20<wbr><wbr><wbr><wbr><wbr>85.0<wbr><wbr><wbr>liujiannan<wbr><wbr><wbr><wbr><wbr>3<br> 1<wbr><wbr><wbr><wbr><wbr><wbr>t02<wbr><wbr><wbr><wbr>19<wbr><wbr><wbr><wbr><wbr>75.0<wbr><wbr><wbr>wangdong<wbr><wbr><wbr><wbr><wbr><wbr><wbr>4</wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr>
<wbr></wbr>
2.
select id1,id2,age,score,name,row_number(id1,id2) rn from(select * from row_number_test distribute by id1,id2 sortby id1,id2,score) a;
<wbr></wbr>
OK
2<wbr><wbr><wbr><wbr><wbr><wbr>t04<wbr><wbr><wbr><wbr>25<wbr><wbr><wbr><wbr><wbr>60.0<wbr><wbr><wbr>youlia<wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr>1<br> 1<wbr><wbr><wbr><wbr><wbr><wbr>t02<wbr><wbr><wbr><wbr>24<wbr><wbr><wbr><wbr><wbr>70.0<wbr><wbr><wbr>zengqiu<wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr>1<br> 2<wbr><wbr><wbr><wbr><wbr><wbr>t03<wbr><wbr><wbr><wbr>27<wbr><wbr><wbr><wbr><wbr>70.0<wbr><wbr><wbr>yongqi<wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr><wbr>1<br> 1<wbr><wbr><wbr><wbr><wbr><wbr>t02<wbr><wbr><wbr><wbr>24<wbr><wbr><wbr><wbr><wbr>70.0<wbr><wbr><wbr> zengqiu<wbr><wbr><wbr><wbr> 2<br> 1<wbr><wbr><wbr><wbr><wbr><wbr>t02<wbr><wbr><wbr><wbr>19<wbr><wbr><wbr><wbr><wbr>75.0<wbr><wbr><wbr> wangdong<wbr><wbr><wbr><wbr>3<br> 1<wbr><wbr><wbr><wbr><wbr><wbr>t01<wbr><wbr><wbr><wbr>20<wbr><wbr><wbr><wbr><wbr>85.0<wbr><wbr><wbr>liujiannan<wbr><wbr><wbr><wbr><wbr>1<br> 2<wbr><wbr><wbr><wbr><wbr><wbr>t03<wbr><wbr><wbr><wbr>30<wbr><wbr><wbr><wbr><wbr>88.0<wbr><wbr><wbr> hongqu<wbr><wbr><wbr><wbr><wbr>2</wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr></wbr>
<wbr></wbr>
下面是代码,只实现了接收1个参数和2个参数的evaluator方法,参数再多的照搬代码就可以了,代码仅供参考:
package org.rowincrement; import java.util.ArrayList; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; public class RowIncrement extends GenericUDTF { Object[] result = new Object[1]; @Override public void close() throws HiveException { } @Override public StructObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException { if (args.length != 1) { throw new UDFArgumentLengthException("RowIncrement takes only one argument"); } if (!args[0].getTypeName().equals("int")) { throw new UDFArgumentException("RowIncrement only takes an integer as a parameter"); } ArrayList<String> fieldNames = new ArrayList<String>(); ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>(); fieldNames.add("col1"); fieldOIs.add(PrimitiveObjectInspectorFactory.javaIntObjectInspector); return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames,fieldOIs); } @Override public void process(Object[] args) throws HiveException { try { int n = Integer.parseInt(args[0].toString()); for (int i=0;i<n;i++) { result[0] = i+1; forward(result); } } catch (Exception e) { throw new HiveException("RowIncrement has an exception"); } } public static void main(String args[]) { Row_number t = new Row_number(); System.out.println(t.evaluate(123)); System.out.println(t.evaluate(123)); System.out.println(t.evaluate(123)); System.out.println(t.evaluate(1234)); System.out.println(t.evaluate(1234)); System.out.println(t.evaluate(1234)); System.out.println(t.evaluate(1235)); } }