Vertica实现mysql函数substring_index:
package com.yy.vertica;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.math.NumberUtils;
import com.vertica.sdk.BlockReader;
import com.vertica.sdk.BlockWriter;
import com.vertica.sdk.ColumnTypes;
import com.vertica.sdk.DestroyInvocation;
import com.vertica.sdk.ScalarFunction;
import com.vertica.sdk.ScalarFunctionFactory;
import com.vertica.sdk.ServerInterface;
import com.vertica.sdk.SizedColumnTypes;
import com.vertica.sdk.UdfException;
import com.vertica.sdk.VerticaType;
/**
* vertica udf : substring_index
*
*/
public class UDFSubStringIndexFactory extends ScalarFunctionFactory
{
@Override
public ScalarFunction createScalarFunction(ServerInterface arg0) {
return new SubStringIndex();
}
public class SubStringIndex extends ScalarFunction{
@Override
public void processBlock(ServerInterface serverInterface, BlockReader argReader, BlockWriter argWriter) throws UdfException, DestroyInvocation {
//see how many arguments were passed in
int numCols = argReader.getNumCols();
//check argument nums
if (numCols != 3) {
throw new UdfException(0, "Must supply 3 arguments:String input, String stripChars, int index");
}
//make sure input columns : String input, String stripChars, int index
SizedColumnTypes inTypes = argReader.getTypeMetaData();
VerticaType firstParamType = inTypes.getColumnType(0);
VerticaType secondParamType = inTypes.getColumnType(1);
VerticaType thirdParamType = inTypes.getColumnType(2);
if (!firstParamType.isStringType() && !secondParamType.isStringType() && !thirdParamType.isInt()) {
throw new UdfException(0, "make sure input columns is : String input, String stripChars, int index");
}
String paramString = argReader.getString(0);
String stripChars = argReader.getString(1);
int index = NumberUtils.toInt(Long.toString(argReader.getLong(2)));
argWriter.setString(evaluate(paramString, stripChars, index));
}
public String evaluate(String input, String stripChars, int index) {
String[] al = StringUtils.split(input, stripChars);
if (al == null || stripChars == null || index == 0) {
return null;
}
int indexAbs = Math.abs(index)>=al.length ? al.length: Math.abs(index);
String[] result = new String[indexAbs];
List tmp = Arrays.asList(al);
if (index > 0) {
System.arraycopy(tmp.toArray(), 0, result, 0, indexAbs);
return StringUtils.join(result, stripChars);
}
// 反向取值
Collections.reverse(tmp);
System.arraycopy(tmp.toArray(), 0, result, 0, indexAbs);
List res = Arrays.asList(result);
Collections.reverse(res);
return StringUtils.join(res, stripChars);
}
}
@Override
public void getPrototype(ServerInterface serverInterface, ColumnTypes argTypes, ColumnTypes returnType) {
// Accepts any number and type or arguments. The ScalarFunction
// class handles parsing the arguments.
argTypes.addVarchar();
argTypes.addVarchar();
argTypes.addInt();
returnType.addVarchar();
}
@Override
public void getReturnType(ServerInterface srvInterface, final SizedColumnTypes argTypes, SizedColumnTypes returnType){
VerticaType type = argTypes.getColumnType(0);
returnType.addVarchar(type.getStringLength());
}
}
结果输出
Vmart=> select substring_index('a/b/c','/',2);
-[ RECORD 1 ]---+----
substring_index | a/b
Vmart=> select substring_index('a/b/c','/',3);
-[ RECORD 1 ]---+------
substring_index | a/b/c
Vmart=> select substring_index('a/b/c','/',1);
-[ RECORD 1 ]---+--
substring_index | a
注意点:
abstract void com.vertica.sdk.UDXFactory.getReturnType ( ServerInterface srvInterface, SizedColumnTypes argTypes,
SizedColumnTypes returnType ) throws UdfException [pure virtual]
Function to tell Vertica what the return types (and length/precision if necessary) of this UDX are.
For CHAR/VARCHAR types, specify the max length,
For NUMERIC types, specify the precision and scale.
For Time types (with or without time zone), specify the precision, -1 means unspecified/don’t care
For IntervalYM/IntervalDS types, specify the precision and range
For all other types, no length/precision specification needed
字符串返回值需要指定返回长度。
创建自定义函数分两个步骤:
1、创建lib
2、创建function
Vmart=> SELECT SET_CONFIG_PARAMETER('JavaBinaryForUDx','/usr/bin/java');
Vmart=> create LIBRARY verticaextlib as '/home/dbadmin/verticaext.jar' language 'Java';
CREATE LIBRARY
Vmart=> create function substring_index as language 'Java' name 'com.yy.vertica.UDFSubStringIndexFactory' librARY verticaextlib;
CREATE FUNCTION