Trident is a high-level abstraction for doing realtime computing on top of Storm. It allows you to seamlessly intermix high throughput (millions of messages per second), stateful stream processing with low latency distributed querying. If you're familiar with high level batch processing tools like Pig or Cascading, the concepts of Trident will be very familiar – Trident has joins, aggregations, grouping, functions, and filters. In addition to these, Trident adds primitives for doing stateful, incremental processing on top of any database or persistence store. Trident has consistent, exactly-once semantics, so it is easy to reason about Trident topologies.
package org.ljh.tridentdemo; import backtype.storm.Config; import backtype.storm.LocalCluster; import backtype.storm.LocalDRPC; import backtype.storm.StormSubmitter; import backtype.storm.generated.StormTopology; import backtype.storm.tuple.Fields; import backtype.storm.tuple.Values; import storm.trident.TridentState; import storm.trident.TridentTopology; import storm.trident.operation.BaseFunction; import storm.trident.operation.TridentCollector; import storm.trident.operation.builtin.Count; import storm.trident.operation.builtin.FilterNull; import storm.trident.operation.builtin.MapGet; import storm.trident.operation.builtin.Sum; import storm.trident.testing.FixedBatchSpout; import storm.trident.testing.MemoryMapState; import storm.trident.tuple.TridentTuple; public class TridentWordCount { public static class Split extends BaseFunction { @Override public void execute(TridentTuple tuple, TridentCollector collector) { String sentence = tuple.getString(0); for (String word : sentence.split(" ")) { collector.emit(new Values(word)); } } } public static StormTopology buildTopology(LocalDRPC drpc) { FixedBatchSpout spout = new FixedBatchSpout(new Fields("sentence"), 3, new Values( "the cow jumped over the moon"), new Values( "the man went to the store and bought some candy"), new Values( "four score and seven years ago"), new Values("how many apples can you eat"), new Values( "to be or not to be the person")); spout.setCycle(true); //创建拓扑对象 TridentTopology topology = new TridentTopology(); //这个流程用于统计单词数据,结果将被保存在wordCounts中 TridentState wordCounts = topology.newStream("spout1", spout) .parallelismHint(16) .each(new Fields("sentence"), new Split(), new Fields("word")) .groupBy(new Fields("word")) .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count")).parallelismHint(16); //这个流程用于查询上面的统计结果 topology.newDRPCStream("words", drpc) .each(new Fields("args"), new Split(), new Fields("word")) .groupBy(new Fields("word")) .stateQuery(wordCounts, new Fields("word"), new MapGet(), new Fields("count")) .each(new Fields("count"), new FilterNull()) .aggregate(new Fields("count"), new Sum(), new Fields("sum")); return topology.build(); } public static void main(String[] args) throws Exception { Config conf = new Config(); conf.setMaxSpoutPending(20); if (args.length == 0) { LocalDRPC drpc = new LocalDRPC(); LocalCluster cluster = new LocalCluster(); cluster.submitTopology("wordCounter", conf, buildTopology(drpc)); for (int i = 0; i < 100; i++) { System.out.println("DRPC RESULT: " + drpc.execute("words", "cat the dog jumped")); Thread.sleep(1000); } } else { conf.setNumWorkers(3); StormSubmitter.submitTopologyWithProgressBar(args[0], conf, buildTopology(null)); } } }
package com.netease.sytopology; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.util.Arrays; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import storm.kafka.BrokerHosts; import storm.kafka.StringScheme; import storm.kafka.ZkHosts; import storm.kafka.trident.OpaqueTridentKafkaSpout; import storm.kafka.trident.TridentKafkaConfig; import storm.trident.TridentTopology; import storm.trident.operation.BaseFunction; import storm.trident.operation.TridentCollector; import storm.trident.operation.builtin.Count; import storm.trident.testing.MemoryMapState; import storm.trident.tuple.TridentTuple; import backtype.storm.Config; import backtype.storm.StormSubmitter; import backtype.storm.generated.AlreadyAliveException; import backtype.storm.generated.InvalidTopologyException; import backtype.storm.generated.StormTopology; import backtype.storm.spout.SchemeAsMultiScheme; import backtype.storm.tuple.Fields; import backtype.storm.tuple.Values; /* * 本类完成以下内容 */ public class SyTopology { public static final Logger LOG = LoggerFactory.getLogger(SyTopology.class); private final BrokerHosts brokerHosts; public SyTopology(String kafkaZookeeper) { brokerHosts = new ZkHosts(kafkaZookeeper); } public StormTopology buildTopology() { TridentKafkaConfig kafkaConfig = new TridentKafkaConfig(brokerHosts, "ma30", "storm"); kafkaConfig.scheme = new SchemeAsMultiScheme(new StringScheme()); // TransactionalTridentKafkaSpout kafkaSpout = new // TransactionalTridentKafkaSpout(kafkaConfig); OpaqueTridentKafkaSpout kafkaSpout = new OpaqueTridentKafkaSpout(kafkaConfig); TridentTopology topology = new TridentTopology(); // TridentState wordCounts = topology.newStream("kafka4", kafkaSpout). each(new Fields("str"), new Split(), new Fields("word")).groupBy(new Fields("word")) .persistentAggregate(new MemoryMapState.Factory(), new Count(), new Fields("count")).parallelismHint(16); // .persistentAggregate(new HazelCastStateFactory(), new Count(), // new Fields("aggregates_words")).parallelismHint(2); return topology.build(); } public static void main(String[] args) throws AlreadyAliveException, InvalidTopologyException { String kafkaZk = args[0]; SyTopology topology = new SyTopology(kafkaZk); Config config = new Config(); config.put(Config.TOPOLOGY_TRIDENT_BATCH_EMIT_INTERVAL_MILLIS, 2000); String name = args[1]; String dockerIp = args[2]; config.setNumWorkers(9); config.setMaxTaskParallelism(5); config.put(Config.NIMBUS_HOST, dockerIp); config.put(Config.NIMBUS_THRIFT_PORT, 6627); config.put(Config.STORM_ZOOKEEPER_PORT, 2181); config.put(Config.STORM_ZOOKEEPER_SERVERS, Arrays.asList(dockerIp)); StormSubmitter.submitTopology(name, config, topology.buildTopology()); } static class Split extends BaseFunction { public void execute(TridentTuple tuple, TridentCollector collector) { String sentence = tuple.getString(0); for (String word : sentence.split(",")) { try { FileWriter fw = new FileWriter(new File("/home/data/test/ma30/ma30.txt"),true); fw.write(word); fw.flush(); fw.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } collector.emit(new Values(word)); } } } }
storm jar target/sytopology2-0.0.1-SNAPSHOT.jar com.netease.sytopology.SyTopology test3