我们首先画一个单词计数流程图
需要的jar包是在storm解压下的lib目录里,如果集成其他的保存数据的组件如redis、hdfs就需要到/root/training/apache-storm-1.0.3/external 这里边的jar包
代码:
spout组件
package com.test.demo;
import java.util.Map;
import java.util.Random;
import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values;
import org.apache.storm.utils.Utils;
public class WordCountSpout extends BaseRichSpout
{
/**
*
*/
private static final long serialVersionUID = 1L;
private SpoutOutputCollector collector;
//模拟采集到的数据
private String[] datas={"I love Beijing","I love China","Beijing of the captial of China"};
@Override
public void nextTuple() {
// TODO Auto-generated method stub
Utils.sleep(2000); //每两秒采集一次数据
int nextInt = new Random().nextInt(3);
this.collector.emit(new Values(datas[nextInt]));
System.out.println("采集到的数据是:"+datas[nextInt]);
}
@Override
public void open(Map arg0, TopologyContext arg1, SpoutOutputCollector collector) {
// TODO Auto-generated method stub
//指定spout组件的输出流
this.collector=collector;
}
//声明输入到下一个组件的字段,组件与组件之间使用touple传递
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
// TODO Auto-generated method stub
declarer.declare(new Fields("content"));
}
}
bolt组件,负责拆分单词的
package com.test.demo;
import java.util.Map;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
public class WordCountSplitBolt extends BaseRichBolt
{
/**
*
*/
private static final long serialVersionUID = 1L;
private OutputCollector collector;
@Override
public void execute(Tuple tuple) {
String line = tuple.getStringByField("content");
String[] split = line.split(" ");
for (String word : split)
{
collector.emit(new Values(word,1));
}
}
@Override
public void prepare(Map arg0, TopologyContext arg1, OutputCollector collector) {
this.collector=collector;
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("word","count"));
}
}
bolt组件,负责计数的
package com.test.demo;
import java.util.HashMap;
import java.util.Map;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
public class WordCountTotalBolt extends BaseRichBolt
{
/**
*
*/
private static final long serialVersionUID = 1L;
private OutputCollector collector;
private Map result=new HashMap();
@Override
public void execute(Tuple tuple) {
// TODO Auto-generated method stub
String word = (String) tuple.getValueByField("word");
Integer integer = result.get(word);
Integer count = result.get(word);
if (count!=null)
{
result.put(word, count+(Integer)tuple.getValueByField("count"));
}else {
result.put(word, (Integer)tuple.getValueByField("count"));
}
System.out.println("统计结果:"+word+"---"+result.get(word));
}
@Override
public void prepare(Map arg0, TopologyContext arg1, OutputCollector collector) {
// TODO Auto-generated method stub
this.collector=collector;
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("word","total"));
}
}
最后编写任务执行
package com.test.demo;
/*import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.generated.StormTopology;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.tuple.Fields;*/
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.generated.AlreadyAliveException;
import org.apache.storm.generated.AuthorizationException;
import org.apache.storm.generated.InvalidTopologyException;
import org.apache.storm.generated.StormTopology;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.tuple.Fields;
public class WordCountTopology
{
public static void main(String[] args) {
//创建一个topology
TopologyBuilder builder=new TopologyBuilder();
//指定任务spout组件
builder.setSpout("myspout", new WordCountSpout());
//指定拆分bolt组件,并指定分组方式与上一级组件进行串联
builder.setBolt("mysplit", new WordCountSplitBolt()).shuffleGrouping("myspout");
//指定计数的bolt组件
builder.setBolt("mycount", new WordCountTotalBolt()).fieldsGrouping("mysplit", new Fields("word"));
//创建任务
StormTopology topology = builder.createTopology();
//配置参数 ,可以为spout或者是bolt传递参数,open和prepare就是和这里对应
Config conf = new Config();
//有两种运行模式
//1、本地模式
// LocalCluster cluster = new LocalCluster();
// cluster.submitTopology("MyWC", conf, topology);
//2、集群模式,直接提交到storm的集群中处理
try
{
//args[0]是topology的别名我们通过提交时路径指定,conf是配置文件,topology是任务
StormSubmitter.submitTopology(args[0], conf, topology);
} catch (Exception e)
{
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
有俩种执行方式
一种是本地模式,就是编辑环境中即可执行,一般用于测试,相当于在编辑环境集成了storm
一种是集群模式需要把程序打包并把任务提交到storm的集群上执行
集群模式导出的时候需要选择主类入口
执行命令
storm jar stormdemo1.jar com.test.demo.WordCountTopology MyWC
集成redis的blot用于保存storm中处理的结果
需要额外导入的jar包有external中的storm-redis-1.0.3.jar,还有redis中的jar包jedis-2.7.0.jar和commons-pool2-2.3.jar
如果不导入redis中的jedis和common的包,会报错误如下
Exception in thread "main" java.lang.NoClassDefFoundError: redis/clients/jedis/JedisCommands
at java.lang.Class.getDeclaredMethods0(Native Method)
at java.lang.Class.privateGetDeclaredMethods(Class.java:2701)
at java.lang.Class.getDeclaredMethod(Class.java:2128)
at java.io.ObjectStreamClass.getPrivateMethod(ObjectStreamClass.java:1475)
at java.io.ObjectStreamClass.access$1700(ObjectStreamClass.java:72)
at java.io.ObjectStreamClass$2.run(ObjectStreamClass.java:498)
at java.io.ObjectStreamClass$2.run(ObjectStreamClass.java:472)
at java.security.AccessController.doPrivileged(Native Method)
at java.io.ObjectStreamClass.(ObjectStreamClass.java:472)
at java.io.ObjectStreamClass.lookup(ObjectStreamClass.java:369)
at java.io.ObjectStreamClass.(ObjectStreamClass.java:468)
at java.io.ObjectStreamClass.lookup(ObjectStreamClass.java:369)
at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1134)
at java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:348)
at org.apache.storm.utils.Utils.javaSerialize(Utils.java:232)
at org.apache.storm.topology.TopologyBuilder.createTopology(TopologyBuilder.java:123)
at com.test.demo.WordCountTopology.main(WordCountTopology.java:30)
Caused by: java.lang.ClassNotFoundException: redis.clients.jedis.JedisCommands
at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:331)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
... 17 more
集成redis的bolt主要在topology中写就可以了
package com.test.demo;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.generated.StormTopology;
import org.apache.storm.redis.bolt.RedisStoreBolt;
import org.apache.storm.redis.common.config.JedisPoolConfig;
import org.apache.storm.redis.common.mapper.RedisDataTypeDescription;
import org.apache.storm.redis.common.mapper.RedisDataTypeDescription.RedisDataType;
import org.apache.storm.redis.common.mapper.RedisStoreMapper;
import org.apache.storm.topology.IRichBolt;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.ITuple;
public class WordCountTopology
{
public static void main(String[] args) {
//创建一个topology
TopologyBuilder builder=new TopologyBuilder();
//指定任务spout组件
builder.setSpout("myspout", new WordCountSpout());
//指定拆分bolt组件,并指定分组方式与上一级组件进行串联
builder.setBolt("mysplit", new WordCountSplitBolt()).shuffleGrouping("myspout");
//指定计数的bolt组件,如果是按字段分组需要制定分组的字段
builder.setBolt("mycount", new WordCountTotalBolt()).fieldsGrouping("mysplit", new Fields("word"));
//指定一个redisbolt组件
builder.setBolt("redisBolt", createRedisBolt()).shuffleGrouping("mycount");
//创建任务
StormTopology topology = builder.createTopology();
//配置参数 ,可以为spout或者是bolt传递参数,open和prepare就是和这里对应
Config conf = new Config();
//有两种运行模式
//1、本地模式
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("MyWC", conf, topology);
//2、集群模式,直接提交到storm的集群中处理
/*try
{
//args[0]是topology的别名我们通过提交时路径指定,conf是配置文件,topology是任务
StormSubmitter.submitTopology(args[0], conf, topology);
} catch (Exception e)
{
// TODO Auto-generated catch block
e.printStackTrace();
}*/
}
private static IRichBolt createRedisBolt() {
//指定Redis的地址、数据的类型
JedisPoolConfig.Builder builder = new JedisPoolConfig.Builder();
builder.setHost("192.168.112.111");
builder.setPort(6379);
JedisPoolConfig config = builder.build();
return new RedisStoreBolt(config, new RedisStoreMapper()
{
//指定redis的保存类型
@Override
public RedisDataTypeDescription getDataTypeDescription() {
return new RedisDataTypeDescription(RedisDataType.HASH, "myResult");
}
//获取上一级bolt的的值
@Override
public String getValueFromTuple(ITuple tuple) {
return String.valueOf(tuple.getIntegerByField("total"));
}
//获取上一级bolt的key
@Override
public String getKeyFromTuple(ITuple tuple) {
return tuple.getStringByField("word");
}
});
}
}
我写好了执行也没有问题,但是在redis中使用hgetall myResult的时候获取不到数据,最后发现在是在redisbolt的前一个bolt没有输出tuple造成的
package com.test.demo;
import java.util.HashMap;
import java.util.Map;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
public class WordCountTotalBolt extends BaseRichBolt
{
/**
*
*/
private static final long serialVersionUID = 1L;
private OutputCollector collector;
private Map result=new HashMap();
@Override
public void execute(Tuple tuple) {
// TODO Auto-generated method stub
String word = (String) tuple.getValueByField("word");
Integer integer = result.get(word);
Integer count = result.get(word);
if (count!=null)
{
result.put(word, count+(Integer)tuple.getValueByField("count"));
}else {
result.put(word, (Integer)tuple.getValueByField("count"));
}
//记得在此处向下一级输出tuple
collector.emit(new Values(word,result.get(word)));
System.out.println("统计结果:"+word+"---"+result.get(word));
}
@Override
public void prepare(Map arg0, TopologyContext arg1, OutputCollector collector) {
// TODO Auto-generated method stub
this.collector=collector;
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("word","total"));
}
}