Cascading 自定义NVL(Coalesce) 函数

import java.util.ArrayList;
import java.util.List;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import cascading.flow.FlowProcess;
import cascading.operation.BaseOperation;
import cascading.operation.Function;
import cascading.operation.FunctionCall;
import cascading.operation.OperationCall;
import cascading.tuple.Fields;
import cascading.tuple.Tuple;
import cascading.tuple.TupleEntry;

/*
 * The NVL expression or function is very common used in SQL (ANSI SQL-2003-compliant). It returns
 * the value input if it is not null , else return the default value
 *
 * @author zilzhang
 */
public class NVL extends BaseOperation<Tuple> implements Function<Tuple> {
  private static final Logger LOG = LoggerFactory.getLogger(NVL.class);

  private List<Object> defaultValue = new ArrayList<Object>();

  public NVL(String fieldName) {
    super(1, new Fields(fieldName));
  }

  /**
   *
   * @param fieldName the name of resulting field
   * @param defaultValue the default value
   */
  public NVL(String fieldName, Object defaultValue) {
    super(1, new Fields(fieldName));
    this.defaultValue.add(defaultValue);
  }

  /**
   * The declared fileds should be the same position with the default value
   *
   * @param declaredFields
   * @param defaultValue
   */
  public NVL(Fields declaredFields, Object... defaultValue) {
    super(declaredFields);
    assert declaredFields.size() == defaultValue.length;
    for (Object object : defaultValue) {
      this.defaultValue.add(object);
    }
  }

  @Override
  public void prepare(FlowProcess flowProcess, OperationCall<Tuple> functionCall) {
    functionCall.setContext(Tuple.size(1));
  }

  @Override
  public void operate(FlowProcess flowProcess, FunctionCall<Tuple> functionCall) {
    Tuple result = functionCall.getContext();
    result.clear();
    TupleEntry argument = functionCall.getArguments();
    Object value = null;
    for (int i = 0; i < argument.size(); i++) {
      value = argument.getObject(i);
      if (value == null || value.equals("")) {
        result.add(defaultValue.get(i));
      } else {
        result.add(value);
      }
    }
    functionCall.getOutputCollector().add(result);
  }

}

 

//使用举例

1. (单字段)如果字段值是空返回 -99

pipe=    new Each(pipe, new Fields("columnname"), new NVL(
            “columnname", -99), Fields.SWAP);

2. (多字段)如果两个字段值都是空,返回-99,"defaultvalue"

pipe = new Each(pipe,new Fields("column1,column2"), new NVL("column1,column2',-99,"defaultvalue"),Fields.SWAP);

 

 

你可能感兴趣的:(function,NVL,cascading)