要求:
统计单词,每10秒钟输出一个统计结果:
思考:几个spout?
几个bolt?
BoltA去识别时钟 如果是B 每10s就输出 这不需要
如果A去识别时钟,A中的task的前后顺序不一定一样
代码实现:
package streaming;
import org.apache.commons.collections.map.HashedMap;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
import java.util.*;
import java.util.logging.Logger;
public class CountBeatingWindow {
public static void main(String[] args) {
TopologyBuilder builder =new TopologyBuilder();
Config config =new Config();
builder.setSpout("spout1",new WindowSpout());
builder.setSpout("spout2",new TimeSpout());
builder.setBolt("bolt1",new WindowCountBoltA(),3).allGrouping("spout2").fieldsGrouping("spout1",new Fields("word"));
builder.setBolt("bolt2",new GatherBoltB()).shuffleGrouping("bolt1");
LocalCluster localCluster =new LocalCluster();
localCluster.submitTopology("window",config,builder.createTopology());
}
static class TimeSpout extends BaseRichSpout{
// 获取系统时间
private static final long serialVersionUID =1L;
long prevTime = System.currentTimeMillis();//初始化的时候的系统时间
String flag ="time_is_over";
SpoutOutputCollector collector;
@Override
public void open(Map map, TopologyContext topologyContext, SpoutOutputCollector spoutOutputCollector) {
this.collector=spoutOutputCollector;
}
@Override
public void nextTuple() {
long currentTime =System.currentTimeMillis();//获取当前时间
if (currentTime-prevTime >= 1000*10){
prevTime=currentTime;//移动时间
collector.emit(new Values(flag));
}
try {
Thread.sleep(100);//休息100毫秒 时间不可以大于10s钟
} catch (InterruptedException e) {
e.printStackTrace();
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
outputFieldsDeclarer.declare(new Fields("word"));
}
}
static class WindowSpout extends BaseRichSpout{
SpoutOutputCollector collector;
String [] words;
Random r;
@Override
public void open(Map map, TopologyContext topologyContext, SpoutOutputCollector spoutOutputCollector) {
this.collector=spoutOutputCollector;
String line ="Strom integrates with queueing and database " +
"technologies you already use Storm topology consumes " +
"streams data processes those streams arbitrarily " +
"comples ways repartitioning the streams between each stage " +
"computation howerver needed read more tutorial";
words = line.split("\\s+");
r=new Random();
}
@Override
public void nextTuple() {
// 往下游发送任意一行数据
String word = words[r.nextInt(words.length)];
collector.emit(new Values(word));
try {
Thread.sleep(100);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
outputFieldsDeclarer.declare(new Fields("word"));
}
}
static class WindowCountBoltA extends BaseRichBolt{
OutputCollector collector;
String flag = "time_is_over";
Map<String,Integer> kv=new HashedMap();
@Override
public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {
this.collector=outputCollector;
}
@Override
public void execute(Tuple tuple) {
String word =tuple.getStringByField("word");
if (kv.containsKey(word)){
kv.put(word, kv.get(word) + 1);
}//不管是单词还是时钟都进行统计
else {
kv.put(word,1);
}
// 如果是时钟就该往下游去发了 ,有序的往下发,调用自定义sort方法
if (word.equals(flag)){
StringBuilder sb = new StringBuilder();
List<Map.Entry<String,Integer>> list = ValueSort.sort(kv);
// 通过迭代器把list里面的东西一个一个取出来
Iterator<Map.Entry<String,Integer>> entry = list.iterator();
while (entry.hasNext()){
Map.Entry<String,Integer> map =entry.next();
sb.append(map.getKey() + "," + map.getValue() + " ");
}
collector.emit(new Values(sb.toString()));
kv.clear();//清空 因为每10秒
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
outputFieldsDeclarer.declare(new Fields("kv"));
}
}
static class GatherBoltB extends BaseRichBolt{
Map<String,Integer>keyCount=new LinkedHashMap<>();
int counttime=1;
private static final Logger Log =Logger.getLogger(String.valueOf((CountBeatingWindow.class)));
@SuppressWarnings("rawtypes")
@Override
public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {
}
@Override
public void execute(Tuple tuple) {
// kv ===== key,value key,value key value
String s = tuple.getStringByField("kv");
String []kvs =s.split(" ");
for (String kv : kvs){
String[] splitKv=kv.split(",");
String key = splitKv[0];
int value = Integer.parseInt(splitKv[1]);
keyCount.put(key,value);
}
keyCount.remove("time_is_over");
List<Map.Entry<String,Integer>> list = ValueSort.sort(keyCount);
Iterator<Map.Entry<String,Integer>> entry = list.iterator();
// 只有所有上游的所有task都完成了,才进行输出
if (counttime<3){
counttime++;
}
else{
int i =0;
while (entry.hasNext()){
if (i>9){
break;
}
Map.Entry<String,Integer> map = entry.next();
Log.info((map.getKey()+":"+map.getValue().toString()));
i++;
}
System.out.println(Log);
System.out.println("---------");
keyCount.clear();
counttime=1;
}
}
@Override
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
}
}
}
class ValueSort {
public static List<Map.Entry<String, Integer>> sort(Map<String, Integer> map) {
List<Map.Entry<String,Integer>> list = new ArrayList<>(map.entrySet());//转换成entryset放到list里面
Collections.sort(list, new Comparator<Map.Entry<String, Integer>>() {
// 用集合的sort方法排序
@Override
// 以Int方法排序 本身有逆序排序
public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) {
return -o1.getValue().compareTo(o2.getValue());//根据值进行排序 加负号逆序排
}
});
return list;
}
}