Storm的学习(三)

前面的两篇博客仅仅是简单的介绍了Storm以及常用的关键词的概念,现在我打算详细说一说Storm的Tredent部分,因为这一部分也是后面将要介绍的机器学习算法实现的基础,因此是要仔细的说一说。下面开始步入正题:
Trident是Storm的高层抽象,支持稳定的流处理(传统的是非稳定)。所谓稳定的意思就是:由数据源头进入Storm拓扑的数据仅能被处理一次。
Trident也有自己的拓扑,称作Trident topology,也有自己的元祖数据称作TridentTuple,每个元祖数据会映射为多个字段(和传统的类似),可以使用getValueByField(String)或者getValue(int)获取固定字段或者固定位置的值。下面先从几个常用的操作开始:
1 Trident functions(函数)
函数的作用是修改原始输入的数据,并且输出一个或者多个元祖(比如:我们在使用机器学习算法时候,需要将原始的数据变为能够处理的数据,数组,矩阵等等,就可以使用这种方法)。下面我来写一个小的Demo解释一下子,场景:输入一个tuple,有两个字段,现在要对两个字段进行求和操作(这里需要注意,function新产生的字段会放在原有字段的后面):

package com.tradient.function;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;

import backtype.storm.task.TopologyContext;
import backtype.storm.tuple.Fields;
import storm.trident.operation.TridentCollector;
import storm.trident.spout.IBatchSpout;

public class functionDemoSpout implements IBatchSpout{

    private static final long serialVersionUID = 10L;

    private int batchSize;

    private HashMap<Long, List<List<Object>>> batchesMap = new HashMap<Long, List<List<Object>>>(); //<id, batch>

    private static final Set<Integer> INTEGERS = new HashSet<Integer>();
    static{
        INTEGERS.add(0);
        INTEGERS.add(1);
        INTEGERS.add(2);
        INTEGERS.add(3);
    }



    public functionDemoSpout(int batchsize) {
        super();
        this.batchSize = batchsize;
    }

    private List<Object> recordGenerator(){
        final Random rand = new Random();
        int num1  = rand.nextInt(5);
        int num2 = rand.nextInt(5);
        List<Object> list = new ArrayList<>();
        list.add(num1);
        list.add(num2);
        return list;
    }   

    @Override
    public void open(Map conf, TopologyContext context) {
        // TODO Auto-generated method stub

    }

    @Override
    public void emitBatch(long batchId, TridentCollector collector) {
        // TODO Auto-generated method stub
        System.out.println("batchId: "+batchId);
        List<List<Object>> batches = this.batchesMap.get(batchId);
        if(batches == null){
            batches = new ArrayList<List<Object>>();
            for(int i=0;i<this.batchSize;i++){
                batches.add(this.recordGenerator());// batches deal with process
            }
            this.batchesMap.put(batchId, batches);
        }
        System.out.println(batches);
        for(List<Object> list:batches){
            collector.emit(list);// emit list
        }
    }

    @Override
    public void ack(long batchId) {
        // TODO Auto-generated method stub
        this.batchesMap.remove(batchId);
        System.out.println("remove: "+batchId);
    }

    @Override
    public void close() {
        // TODO Auto-generated method stub

    }

    @Override
    public Map getComponentConfiguration() {
        // TODO Auto-generated method stub
        return null;
    }

    @Override
    public Fields getOutputFields() {
        // TODO Auto-generated method stub
        return new Fields("num1","num2");
    }

}
package com.tradient.function;

import backtype.storm.tuple.Values;
import storm.trident.operation.BaseFunction;
import storm.trident.operation.TridentCollector;
import storm.trident.tuple.TridentTuple;

public class functionDemo extends BaseFunction{

    private static final long serialVersionUID = 5L;

    @Override
    public void execute(TridentTuple tuple, TridentCollector collector) {
        // TODO Auto-generated method stub
        int number1 = tuple.getInteger(0);
        int number2 = tuple.getInteger(1);
        int sum = number1 + number2;
        collector.emit(new Values(number1+"+"+number2,sum));
    }

}
package com.tradient.function;

import storm.trident.operation.BaseFunction;
import storm.trident.operation.TridentCollector;
import storm.trident.tuple.TridentTuple;

public class printfunction extends BaseFunction {

    @Override
    public void execute(TridentTuple tuple, TridentCollector collector) {
        // TODO Auto-generated method stub
        System.out.println(tuple);
    }

}
package com.tradient.function;

import storm.trident.TridentTopology;
import storm.trident.operation.builtin.Count;

import com.storm.trident.demo01.FakeTweetSpout;
import com.storm.trident.demo01.TridentUtility;

import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.generated.StormTopology;
import backtype.storm.tuple.Fields;

public class tridentfunctiontopology {

    public static StormTopology buildTopology(){
        functionDemoSpout spout = new functionDemoSpout(5);
        TridentTopology topology = new TridentTopology();
        functionDemo f = new functionDemo();
        printfunction pf = new printfunction();
        topology.newStream("function", spout).shuffle().each(new Fields("num1","num2"), f, new Fields("info","sum"))
        .each( new Fields("info","sum"), pf, new Fields("out1","out2")).
        parallelismHint(20);
        return topology.build();
    }

    public static void main(String[] args) {
        // TODO Auto-generated method stub
        Config config = new Config();
        config.setMaxSpoutPending(10);
        LocalCluster cluster = new LocalCluster();
        cluster.submitTopology("Count", config, buildTopology());
    }
}

2 Trident filters
滤波器的作用是保留满足条件的元祖,去除不满足条件的元祖,比如在上面的一个例子当中加入一个滤波器,当两个数的和是偶数的时候输出,否则去除。

package com.tradient.function;

import storm.trident.operation.BaseFilter;
import storm.trident.tuple.TridentTuple;

public class CheckEvenSumFilter extends BaseFilter{

    @Override
    public boolean isKeep(TridentTuple tuple) {
        // TODO Auto-generated method stub
        int sum = tuple.getInteger(1);
        if(sum%2==0){
            return true;
        }
        return false;
    }

}
package com.tradient.function;

import storm.trident.TridentTopology;
import storm.trident.operation.builtin.Count;

import com.storm.trident.demo01.FakeTweetSpout;
import com.storm.trident.demo01.TridentUtility;

import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.generated.StormTopology;
import backtype.storm.tuple.Fields;

public class tridentfunctiontopology {

    public static StormTopology buildTopology(){
        functionDemoSpout spout = new functionDemoSpout(5);
        TridentTopology topology = new TridentTopology();
        functionDemo f = new functionDemo();
        printfunction pf = new printfunction();
        CheckEvenSumFilter filter = new CheckEvenSumFilter();
        topology.newStream("function", spout).shuffle().each(new Fields("num1","num2"), f, new Fields("info","sum"))
        .each( new Fields("info","sum"), filter)
        .each( new Fields("info","sum"), pf, new Fields("out1","out2")).
        parallelismHint(20);
        return topology.build();
    }

    public static void main(String[] args) {
        // TODO Auto-generated method stub
        Config config = new Config();
        config.setMaxSpoutPending(10);
        LocalCluster cluster = new LocalCluster();
        cluster.submitTopology("Count", config, buildTopology());
    }
}

3 Trident projection
projection(我不清楚到底应该翻译成什么更合适),这个的作用是将输入的多个字段,仅输出个别字段,好难描述,请看下图就明白了:
Storm的学习(三)_第1张图片

你可能感兴趣的:(storm,trident)