streaming 时钟统计单词 java开发

要求:
统计单词,每10秒钟输出一个统计结果:
思考:几个spout?
几个bolt?
streaming 时钟统计单词 java开发_第1张图片
streaming 时钟统计单词 java开发_第2张图片
BoltA去识别时钟 如果是B 每10s就输出 这不需要
如果A去识别时钟,A中的task的前后顺序不一定一样
代码实现:

package streaming;

import org.apache.commons.collections.map.HashedMap;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.spout.SpoutOutputCollector;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.topology.base.BaseRichSpout;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;

import java.util.*;
import java.util.logging.Logger;

public class CountBeatingWindow {
    public static void main(String[] args) {
        TopologyBuilder builder =new TopologyBuilder();
        Config config =new Config();
        builder.setSpout("spout1",new WindowSpout());
        builder.setSpout("spout2",new TimeSpout());
        builder.setBolt("bolt1",new WindowCountBoltA(),3).allGrouping("spout2").fieldsGrouping("spout1",new Fields("word"));
        builder.setBolt("bolt2",new GatherBoltB()).shuffleGrouping("bolt1");
        LocalCluster localCluster =new LocalCluster();
        localCluster.submitTopology("window",config,builder.createTopology());


    }
    static class TimeSpout extends BaseRichSpout{
//        获取系统时间
        private static final long serialVersionUID =1L;
        long prevTime = System.currentTimeMillis();//初始化的时候的系统时间
        String flag ="time_is_over";
        SpoutOutputCollector collector;


        @Override
        public void open(Map map, TopologyContext topologyContext, SpoutOutputCollector spoutOutputCollector) {
            this.collector=spoutOutputCollector;




        }

        @Override
        public void nextTuple() {
            long currentTime =System.currentTimeMillis();//获取当前时间
            if (currentTime-prevTime >= 1000*10){
                prevTime=currentTime;//移动时间
                collector.emit(new Values(flag));
            }
            try {
                Thread.sleep(100);//休息100毫秒  时间不可以大于10s钟
            } catch (InterruptedException e) {
                e.printStackTrace();
            }

        }

        @Override
        public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
            outputFieldsDeclarer.declare(new Fields("word"));

        }
    }
    static class WindowSpout extends BaseRichSpout{
        SpoutOutputCollector collector;
        String [] words;
        Random r;

        @Override
        public void open(Map map, TopologyContext topologyContext, SpoutOutputCollector spoutOutputCollector) {
            this.collector=spoutOutputCollector;
            String line ="Strom integrates with queueing and database " +
                    "technologies you already use Storm topology consumes " +
                    "streams data processes those streams arbitrarily " +
                    "comples ways repartitioning the streams between each stage " +
                    "computation howerver needed read more tutorial";
            words = line.split("\\s+");
            r=new Random();



        }

        @Override
        public void nextTuple() {
//            往下游发送任意一行数据
            String word = words[r.nextInt(words.length)];
            collector.emit(new Values(word));
            try {
                Thread.sleep(100);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }


        }

        @Override
        public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
            outputFieldsDeclarer.declare(new Fields("word"));

        }
    }
    static class WindowCountBoltA extends BaseRichBolt{
        OutputCollector collector;
        String flag = "time_is_over";
        Map<String,Integer> kv=new HashedMap();


        @Override
        public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {
            this.collector=outputCollector;
        }

        @Override
        public void execute(Tuple tuple) {
            String word =tuple.getStringByField("word");
            if (kv.containsKey(word)){
                kv.put(word, kv.get(word) + 1);
            }//不管是单词还是时钟都进行统计
            else {
                kv.put(word,1);
            }
//            如果是时钟就该往下游去发了 ,有序的往下发,调用自定义sort方法
            if (word.equals(flag)){
                StringBuilder sb = new StringBuilder();
                List<Map.Entry<String,Integer>> list = ValueSort.sort(kv);
//                通过迭代器把list里面的东西一个一个取出来
                Iterator<Map.Entry<String,Integer>> entry = list.iterator();
                while (entry.hasNext()){
                    Map.Entry<String,Integer> map =entry.next();
                    sb.append(map.getKey() + "," + map.getValue() + " ");

                }
                collector.emit(new Values(sb.toString()));
                kv.clear();//清空  因为每10秒


            }

        }

        @Override
        public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {

            outputFieldsDeclarer.declare(new Fields("kv"));
        }
    }
    static class GatherBoltB extends BaseRichBolt{
        Map<String,Integer>keyCount=new LinkedHashMap<>();
        int counttime=1;
        private static final Logger Log =Logger.getLogger(String.valueOf((CountBeatingWindow.class)));
        @SuppressWarnings("rawtypes")

        @Override
        public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {


        }

        @Override
        public void execute(Tuple tuple) {
//            kv ===== key,value key,value key value
            String s = tuple.getStringByField("kv");
            String []kvs =s.split(" ");
            for (String kv : kvs){
                String[] splitKv=kv.split(",");
                String key = splitKv[0];
                int value = Integer.parseInt(splitKv[1]);
                keyCount.put(key,value);
                }
            keyCount.remove("time_is_over");
            List<Map.Entry<String,Integer>> list = ValueSort.sort(keyCount);
            Iterator<Map.Entry<String,Integer>> entry = list.iterator();
//            只有所有上游的所有task都完成了,才进行输出
            if (counttime<3){
                counttime++;
            }
            else{
                int i =0;
                while (entry.hasNext()){
                    if (i>9){
                        break;
                    }
                    Map.Entry<String,Integer> map = entry.next();
                    Log.info((map.getKey()+":"+map.getValue().toString()));
                    i++;
                }
                System.out.println(Log);
                System.out.println("---------");
                keyCount.clear();
                counttime=1;
            }
            }




        @Override
        public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {

        }
    }


}
class ValueSort {
    public static List<Map.Entry<String, Integer>> sort(Map<String, Integer> map) {
        List<Map.Entry<String,Integer>> list = new ArrayList<>(map.entrySet());//转换成entryset放到list里面

        Collections.sort(list, new Comparator<Map.Entry<String, Integer>>() {
//            用集合的sort方法排序
            @Override
//            以Int方法排序 本身有逆序排序
            public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) {
                return -o1.getValue().compareTo(o2.getValue());//根据值进行排序 加负号逆序排
            }
        });
        return list;

    }
}

你可能感兴趣的:(大数据,java,streaming,api)