Cascading 基本Pipe - Each 使用
Each,
说白了就是对Pipe中的每行进行处理。话不多说,直接上代码。
package cascading;
import cascading.flow.FlowConnector;
import cascading.flow.FlowDef;
import cascading.flow.hadoop.HadoopFlowConnector;
import cascading.operation.Insert;
import cascading.pipe.Each;
import cascading.pipe.Pipe;
import cascading.scheme.Scheme;
import cascading.scheme.hadoop.TextDelimited;
import cascading.tap.SinkMode;
import cascading.tap.Tap;
import cascading.tap.hadoop.Hfs;
import cascading.tuple.Fields;
public class EachPipe {
public static void main(String[] args) {
// defile input file column and delimeter
Scheme inScheme = new TextDelimited(new Fields("ID", "Name"), ";");
// define input file path
Tap intap1 = new Hfs(inScheme, args[0]);
// defile output path
Tap outap = new Hfs(inScheme, args[1], SinkMode.REPLACE);
Pipe inputPipe = new Pipe("inputPipe");
// use each pipe to add a new column for each record
inputPipe = new Each(inputPipe,
new Insert(new Fields("sourceflag"), 1), Fields.ALL);
FlowDef flowDef = FlowDef.flowDef().addSource(inputPipe, intap1)
.addTailSink(inputPipe, outap);
FlowConnector flowConnector = new HadoopFlowConnector();
flowConnector.connect(flowDef).complete();
}
}