import java.util.ArrayList;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import cascading.flow.FlowProcess;
import cascading.operation.BaseOperation;
import cascading.operation.Function;
import cascading.operation.FunctionCall;
import cascading.operation.OperationCall;
import cascading.tuple.Fields;
import cascading.tuple.Tuple;
import cascading.tuple.TupleEntry;
/*
* The NVL expression or function is very common used in SQL (ANSI SQL-2003-compliant). It returns
* the value input if it is not null , else return the default value
*
* @author zilzhang
*/
public class NVL extends BaseOperation<Tuple> implements Function<Tuple> {
private static final Logger LOG = LoggerFactory.getLogger(NVL.class);
private List<Object> defaultValue = new ArrayList<Object>();
public NVL(String fieldName) {
super(1, new Fields(fieldName));
}
/**
*
* @param fieldName the name of resulting field
* @param defaultValue the default value
*/
public NVL(String fieldName, Object defaultValue) {
super(1, new Fields(fieldName));
this.defaultValue.add(defaultValue);
}
/**
* The declared fileds should be the same position with the default value
*
* @param declaredFields
* @param defaultValue
*/
public NVL(Fields declaredFields, Object... defaultValue) {
super(declaredFields);
assert declaredFields.size() == defaultValue.length;
for (Object object : defaultValue) {
this.defaultValue.add(object);
}
}
@Override
public void prepare(FlowProcess flowProcess, OperationCall<Tuple> functionCall) {
functionCall.setContext(Tuple.size(1));
}
@Override
public void operate(FlowProcess flowProcess, FunctionCall<Tuple> functionCall) {
Tuple result = functionCall.getContext();
result.clear();
TupleEntry argument = functionCall.getArguments();
Object value = null;
for (int i = 0; i < argument.size(); i++) {
value = argument.getObject(i);
if (value == null || value.equals("")) {
result.add(defaultValue.get(i));
} else {
result.add(value);
}
}
functionCall.getOutputCollector().add(result);
}
}
//使用举例
1. (单字段)如果字段值是空返回 -99
pipe= new Each(pipe, new Fields("columnname"), new NVL(
“columnname", -99), Fields.SWAP);
2. (多字段)如果两个字段值都是空,返回-99,"defaultvalue"
pipe = new Each(pipe,new Fields("column1,column2"), new NVL("column1,column2',-99,"defaultvalue"),Fields.SWAP);