Vertica实现mysql函数substring_index:
package com.yy.vertica; import java.util.Arrays; import java.util.Collections; import java.util.List; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.math.NumberUtils; import com.vertica.sdk.BlockReader; import com.vertica.sdk.BlockWriter; import com.vertica.sdk.ColumnTypes; import com.vertica.sdk.DestroyInvocation; import com.vertica.sdk.ScalarFunction; import com.vertica.sdk.ScalarFunctionFactory; import com.vertica.sdk.ServerInterface; import com.vertica.sdk.SizedColumnTypes; import com.vertica.sdk.UdfException; import com.vertica.sdk.VerticaType; /** * vertica udf : substring_index * */ public class UDFSubStringIndexFactory extends ScalarFunctionFactory { @Override public ScalarFunction createScalarFunction(ServerInterface arg0) { return new SubStringIndex(); } public class SubStringIndex extends ScalarFunction{ @Override public void processBlock(ServerInterface serverInterface, BlockReader argReader, BlockWriter argWriter) throws UdfException, DestroyInvocation { //see how many arguments were passed in int numCols = argReader.getNumCols(); //check argument nums if (numCols != 3) { throw new UdfException(0, "Must supply 3 arguments:String input, String stripChars, int index"); } //make sure input columns : String input, String stripChars, int index SizedColumnTypes inTypes = argReader.getTypeMetaData(); VerticaType firstParamType = inTypes.getColumnType(0); VerticaType secondParamType = inTypes.getColumnType(1); VerticaType thirdParamType = inTypes.getColumnType(2); if (!firstParamType.isStringType() && !secondParamType.isStringType() && !thirdParamType.isInt()) { throw new UdfException(0, "make sure input columns is : String input, String stripChars, int index"); } String paramString = argReader.getString(0); String stripChars = argReader.getString(1); int index = NumberUtils.toInt(Long.toString(argReader.getLong(2))); argWriter.setString(evaluate(paramString, stripChars, index)); } public String evaluate(String input, String stripChars, int index) { String[] al = StringUtils.split(input, stripChars); if (al == null || stripChars == null || index == 0) { return null; } int indexAbs = Math.abs(index)>=al.length ? al.length: Math.abs(index); String[] result = new String[indexAbs]; List<String> tmp = Arrays.asList(al); if (index > 0) { System.arraycopy(tmp.toArray(), 0, result, 0, indexAbs); return StringUtils.join(result, stripChars); } // 反向取值 Collections.reverse(tmp); System.arraycopy(tmp.toArray(), 0, result, 0, indexAbs); List<String> res = Arrays.asList(result); Collections.reverse(res); return StringUtils.join(res, stripChars); } } @Override public void getPrototype(ServerInterface serverInterface, ColumnTypes argTypes, ColumnTypes returnType) { // Accepts any number and type or arguments. The ScalarFunction // class handles parsing the arguments. argTypes.addVarchar(); argTypes.addVarchar(); argTypes.addInt(); returnType.addVarchar(); } @Override public void getReturnType(ServerInterface srvInterface, final SizedColumnTypes argTypes, SizedColumnTypes returnType){ VerticaType type = argTypes.getColumnType(0); returnType.addVarchar(type.getStringLength()); } }
结果输出
Vmart=> select substring_index('a/b/c','/',2); -[ RECORD 1 ]---+---- substring_index | a/b Vmart=> select substring_index('a/b/c','/',3); -[ RECORD 1 ]---+------ substring_index | a/b/c Vmart=> select substring_index('a/b/c','/',1); -[ RECORD 1 ]---+-- substring_index | a
注意点:
abstract void com.vertica.sdk.UDXFactory.getReturnType ( ServerInterface srvInterface, SizedColumnTypes argTypes,
SizedColumnTypes returnType ) throws UdfException [pure virtual]
Function to tell Vertica what the return types (and length/precision if necessary) of this UDX are.
For CHAR/VARCHAR types, specify the max length,
For NUMERIC types, specify the precision and scale.
For Time types (with or without time zone), specify the precision, -1 means unspecified/don’t care
For IntervalYM/IntervalDS types, specify the precision and range
For all other types, no length/precision specification needed
字符串返回值需要指定返回长度。
创建自定义函数分两个步骤:
1、创建lib
2、创建function
Vmart=> SELECT SET_CONFIG_PARAMETER('JavaBinaryForUDx','/usr/bin/java'); Vmart=> creaTE liBRARY verticaextlib as '/home/dbadmin/verticaext.jar' language 'Java'; CREATE LIBRARY Vmart=> create function substring_index as language 'Java' name 'com.yy.vertica.UDFSubStringIndexFactory' librARY verticaextlib; CREATE FUNCTION