Storm的学习(六)

这篇博客最后一次讲解Storm的技术部分,终于再后面能够说一说我最喜欢的机器学习部分了。这一篇打算写一个小Demo,例子来自于官网。下面的例子是词频统计,并且查询词频统计的结果:

package Trident.Tutorial;

import backtype.storm.Config;
import backtype.storm.StormSubmitter;
import backtype.storm.generated.AlreadyAliveException;
import backtype.storm.generated.InvalidTopologyException;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Values;
import storm.trident.TridentState;
import storm.trident.TridentTopology;
import storm.trident.operation.builtin.Count;
import storm.trident.operation.builtin.FilterNull;
import storm.trident.operation.builtin.MapGet;
import storm.trident.operation.builtin.Sum;
import storm.trident.testing.FixedBatchSpout;
import storm.trident.testing.MemoryMapState;
import storm.trident.testing.Split;

public class test01 {

    public static void main(String[] args) throws AlreadyAliveException, InvalidTopologyException {
        // TODO Auto-generated method stub
        FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence"), 3,
                   new Values("the cow jumped over the moon"),
                   new Values("the man went to the store and bought some candy"),
                   new Values("four score and seven years ago"),
                   new Values("how many apples can you eat"));
        spout.setCycle(true);
        TridentTopology topology = new TridentTopology();        
        TridentState wordCounts =
             topology.newStream("spout1", spout)//创建一个Stream
               .each(new Fields("sentence"), new Split(), new Fields("word"))//对发出的每一个tuple(句子)进行分词
               .groupBy(new Fields("word"))//对每一个tuple执行分组操作,同一个Filed的词分为一组
               .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count"))//词频统计 
               .parallelismHint(6);//并发控制
        topology.newDRPCStream("words")//DRPCStream
           .each(new Fields("args"), new Split(), new Fields("word"))//从DRPC接收数据并且分词
           .groupBy(new Fields("word"))//分组
           .stateQuery(wordCounts, new Fields("word"), new MapGet(), new Fields("count"))//查询词频
           .each(new Fields("count"), new FilterNull())//过滤去除null
           .aggregate(new Fields("count"), new Sum(), new Fields("sum"));//统计最终结果
        Config config = new Config();
         config.setDebug(true);
         StormSubmitter.submitTopology("demo01", config,topology.build());
    }

}

将上面的代码打成Jar包并且运行可以得到如下:
Storm的学习(六)_第1张图片

package Trident.Tutorial;

import org.apache.thrift7.TException;

import backtype.storm.generated.DRPCExecutionException;
import backtype.storm.utils.DRPCClient;

public class Client01 {

    public static void main(String[] args) throws TException, DRPCExecutionException {
        // TODO Auto-generated method stub
        DRPCClient client = new DRPCClient("localhost", 3772);
        System.out.println("开始调用....................");
        String result = client.execute("words", "apples");
        System.out.println(result);
    }
}

运行上述代码可以得到如下结果:
Storm的学习(六)_第2张图片

你可能感兴趣的:(storm)