目前下面的代码在本地集群测试环境中完成了测试,但是还未提交到docker集群中实际运行,这将会在完成大作业时更新。
storm集群
kafka集群
zookeeper集群
三个集群的搭建可以查看秃头的其他文章
作业要求使用storm集群对10份股票数据文件进行统计,数据集中包含10支股票,包含股票金额、成交单价、成交量、成交时间等字段。
主要实现两个任务:
1、对不同类型的股票的成交总金额、成交量进行实时统计
2、对不同类型的股票每小时的交总金额、成交量进行实时统计
通过kafka读取文件,将数据以消息的形式发送给spout,而后由spout将数据发给一个父bolt,由父bolt预处理数据后,分发给两个子bolt实现具体的任务。
4.0.0
org.example
kafkaConsume
1.0-SNAPSHOT
storm
8
8
UTF-8
org.projectlombok
lombok
1.18.12
junit
junit
4.13
test
org.apache.kafka
kafka-clients
2.4.1
org.apache.storm
storm-core
2.2.0
org.apache.storm
storm-kafka-client
2.2.0
com.alibaba
fastjson
1.2.47
org.apache.commons
commons-csv
1.8
org.apache.maven.plugins
maven-compiler-plugin
3.8.1
1.8
UTF-8
org.apache.maven.plugins
maven-assembly-plugin
2.5.5
com.yj.TCPClient.upload.App
jar-with-dependencies
make-assembly
package
single
package org.example.kafkaPC;
import com.sun.org.apache.bcel.internal.generic.ACONST_NULL;
import org.apache.commons.csv.CSVRecord;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.common.serialization.StringSerializer;
import org.example.utils.ReadFile;
import org.example.utils.TimeFormatter;
import java.util.*;
import java.util.stream.IntStream;
public class Producer {
// 创建kafka生产者
// 读取csv文件
// 取出trade_volume进行类加
// 创建kafka消费者
public void producer() {
// 初始文件目录
List filePaths = new ArrayList<>();
filePaths.add("/home/veteran/data/fileData/股票数据/股票数据1.csv");
filePaths.add("/home/veteran/data/fileData/股票数据/股票数据2.csv");
filePaths.add("/home/veteran/data/fileData/股票数据/股票数据3.csv");
filePaths.add("/home/veteran/data/fileData/股票数据/股票数据4.csv");
filePaths.add("/home/veteran/data/fileData/股票数据/股票数据5.csv");
filePaths.add("/home/veteran/data/fileData/股票数据/股票数据6.csv");
filePaths.add("/home/veteran/data/fileData/股票数据/股票数据7.csv");
filePaths.add("/home/veteran/data/fileData/股票数据/股票数据8.csv");
filePaths.add("/home/veteran/data/fileData/股票数据/股票数据9.csv");
filePaths.add("/home/veteran/data/fileData/股票数据/股票数据10.csv");
// 创建topic
String topic = "stock_1";
// 创建生产者
Properties properties = new Properties();
properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "192.168.43.219:9092,192.168.43.219:9093,192.168.43.219:9094");
properties.put(ProducerConfig.LINGER_MS_CONFIG, 5);
properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
KafkaProducer kProducer = new KafkaProducer<>(properties);
// 创建文件读取器
ReadFile readFile = new ReadFile();
TimeFormatter timeFormatter = new TimeFormatter();
// 循环处理三个文件
IntStream.range(0, filePaths.size()).forEach(index -> {
Iterable records = readFile.readCSV(filePaths.get(index));
// 提交topic
for (CSVRecord csvRecord : records) {
try {
// 取出stock_name做为key
String key = csvRecord.get("stock_code");
// 取出trade_volume进行统计
double trade_volume = Double.parseDouble(csvRecord.get("trade_volume"));
// 取出交易单价
float price = Float.parseFloat(csvRecord.get("price"));
String totalPrice = Double.toString(trade_volume * price);
// 取出时间 并且转换为数值
String time = csvRecord.get("time");
// System.out.println(time);
long convertedTime = timeFormatter.convertTime(time);
System.out.println(key+","+totalPrice+","+convertedTime);
ProducerRecord kRecord = new ProducerRecord<>(topic, key, totalPrice+","+convertedTime);
// 使用kafka producer发送
kProducer.send(kRecord);
}catch (Exception e){
e.printStackTrace();
}finally {
continue;
}
}
});
// 关闭生产者
kProducer.close();
}
public static void main(String[] args) {
Producer producer = new Producer();
producer.producer();
}
}
package org.example.storm;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.kafka.spout.KafkaSpout;
import org.apache.storm.kafka.spout.KafkaSpoutConfig;
import org.apache.storm.topology.TopologyBuilder;
import java.util.Collections;
public class Topology {
public static void main(String[] args) throws Exception {
// 配置spout
KafkaSpoutConfig kafkaSpoutConfig = KafkaSpoutConfig.builder("192.168.43.219:9092,192.168.43.219:9093,192.168.43.219:9094", "stock_1").setProp(ConsumerConfig.GROUP_ID_CONFIG, "kafkaSpoutTestGroup").build();
// 配置拓扑
TopologyBuilder topologyBuilder = new TopologyBuilder();
// 设置spout
topologyBuilder.setSpout("kafka-spout", new KafkaSpout<>(kafkaSpoutConfig), 1);
// 设置bolt1
topologyBuilder.setBolt("process-bolt", new ProcessingBolt()).shuffleGrouping("kafka-spout");
// 设置bolt2
// topologyBuilder.setBolt("next-bolt1", new NextBolt1(), 3).shuffleGrouping("process-bolt");
// 设置bolt3
topologyBuilder.setBolt("next-bolt2", new NextBolt2(), 1).shuffleGrouping("process-bolt");
// 配置
Config config = new Config();
// 提交topology
// StormSubmitter.submitTopology("stockStatistic", config, topologyBuilder.createTopology());
// 创建本地集群进行测试
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("LocalReadingFromKafkaApp", config, topologyBuilder.createTopology());
}
}
package org.example.storm;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;
import java.util.HashMap;
import java.util.Map;
public class ProcessingBolt extends BaseRichBolt {
private OutputCollector collector;
private Map tradeVolumeMap;
private Map totalPriceMap;
private Map startTimeMap;
private Map accumulateTimeMap;
@Override
public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {
this.collector = outputCollector;
// 创建交易量记录map
tradeVolumeMap = new HashMap<>();
// 创建总金额记录map
totalPriceMap = new HashMap<>();
// 创建开始时间记录map
startTimeMap = new HashMap<>();
// 创建累积时间记录器
accumulateTimeMap = new HashMap<>();
}
@Override
public void execute(Tuple tuple) {
// 接受key 根据key分类
String key = tuple.getStringByField("key");
// 接受数据 数据格式为"交易量,时间"
String value = tuple.getStringByField("value");
String[] valueList = value.split(",");
Double totalPrice = Double.parseDouble(valueList[0]);
// System.out.println(totalPrice);
// System.out.println(totalPriceMap.get(key));
long time = Long.parseLong(valueList[1]);
if (tradeVolumeMap.containsKey(key)){
// 累积交易量
tradeVolumeMap.put(key, tradeVolumeMap.get(key) + 1);
// 累积交易金额
totalPriceMap.put(key, totalPriceMap.get(key) + totalPrice);
// 累积时间 当前时间晚于记录的时间
if (time > startTimeMap.get(key) && time > accumulateTimeMap.get(key)){;
accumulateTimeMap.put(key, time);
} else if (time < startTimeMap.get(key)) {
// 当前时间早于记录时间 更新startTimeMap
startTimeMap.put(key, time);
}
}else {
tradeVolumeMap.put(key, 1);
totalPriceMap.put(key, totalPrice);
startTimeMap.put(key, time);
accumulateTimeMap.put(key, time);
}
// 封装map 传向下一个bolt
Values values = new Values(tradeVolumeMap, totalPriceMap, startTimeMap, accumulateTimeMap);
// System.out.println(tradeVolumeMap);
// System.out.println(totalPriceMap);
// System.out.println(startTimeMap);
// System.out.println(accumulateTimeMap);
// System.out.println("----------------------------------");
collector.emit(values);
collector.ack(tuple);
}
@Override
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
// 声明流 q:要申明一个默认流
outputFieldsDeclarer.declare(new Fields("tradeVolumeMap", "totalPriceMap", "startTimeMap", "accumulateTimeMap"));
}
}
package org.example.storm;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.tuple.Tuple;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
public class NextBolt1 extends BaseRichBolt {
private OutputCollector collector;
private long startTime;
@Override
public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {
this.collector = outputCollector;
this.startTime = System.currentTimeMillis();
}
@Override
public void execute(Tuple tuple) {
// 接收并处理传递过来的数据
Map tradeVolumeMap = new HashMap<>((Map) tuple.getValueByField("tradeVolumeMap"));
Map totalPriceMap = new HashMap<>((Map) tuple.getValueByField("totalPriceMap"));
Map startTimeMap = new HashMap<>((Map) tuple.getValueByField("startTimeMap"));
Map accumulateTimeMap = new HashMap<>((Map) tuple.getValueByField("accumulateTimeMap"));
// System.out.println(localTradeVolumeMap);
// System.out.println(localTotalPriceMap);
// System.out.println(localStartTimeMap);
// System.out.println(accumulateTimeMap);
for (Map.Entry entry: tradeVolumeMap.entrySet()){
// 取得股票code
String stockName = entry.getKey();
Integer totalVolume = entry.getValue();
Double totalPrice = totalPriceMap.get(entry.getKey());
// System.out.printf("【%s】共计交易 %f 元 成交 %d 单 \n", stockName, totalPrice, totalVolume);
}
// System.out.println("----------------------------------");
System.out.printf("总耗时:%ds \n", (System.currentTimeMillis() - this.startTime) / 1000);
collector.ack(tuple);
}
@Override
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
}
}
package org.example.storm;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.tuple.Tuple;
import java.util.HashMap;
import java.util.Map;
public class NextBolt2 extends BaseRichBolt {
private Map tradeVolumeMap;
private Map totalPriceMap;
private Map startTimeMap;
private Map accumulateTimeMap;
private OutputCollector collector;
private Map recordStartTimeMap;
private Map recordTradeVolumeMap;
private Map recordTotalPriceMap;
private long startTime;
@Override
public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {
this.collector = outputCollector;
this.startTime = System.currentTimeMillis();
// 创建交易量记录map
tradeVolumeMap = new HashMap<>();
// 创建总金额记录map
totalPriceMap = new HashMap<>();
// 创建开始时间记录map
startTimeMap = new HashMap<>();
// 创建累积时间记录器
accumulateTimeMap = new HashMap<>();
// 创建当前bolt中的时间记录器
recordStartTimeMap = new HashMap<>();
// 创建当前bolt中的交易量记录器
recordTradeVolumeMap = new HashMap<>();
// 创建当前bolt中的总金额记录器
recordTotalPriceMap = new HashMap<>();
}
@Override
public void execute(Tuple tuple) {
// 接收并处理传递过来的数据
tradeVolumeMap = (Map) tuple.getValueByField("tradeVolumeMap");
totalPriceMap = (Map) tuple.getValueByField("totalPriceMap");
startTimeMap = (Map) tuple.getValueByField("startTimeMap");
accumulateTimeMap = (Map) tuple.getValueByField("accumulateTimeMap");
// 创建拷贝
Map tradeVolumeMapCopy = new HashMap<>(tradeVolumeMap);
Map totalPriceMapCopy = new HashMap<>(totalPriceMap);
Map startTimeMapCopy = new HashMap<>(startTimeMap);
Map accumulateTimeMapCopy = new HashMap<>(accumulateTimeMap);
for (Map.Entry entry: tradeVolumeMapCopy.entrySet()) {
String key = entry.getKey();
if (recordTradeVolumeMap.containsKey(key)){
long accumulateTime = accumulateTimeMapCopy.get(key);
long recordStartTime = recordStartTimeMap.get(key);
long volume = tradeVolumeMapCopy.get(key)-recordTradeVolumeMap.get(key);
double price = totalPriceMapCopy.get(key)-recordTotalPriceMap.get(key);
// System.out.println(accumulateTime - recordStartTime);
if (accumulateTime - recordStartTime >= 3600000L){
// System.out.println("yes");
// System.out.printf("【%s】一小时成交 %d 单 金额为%f\n", key, volume, price);
// 更新bolt中的时间记录器
recordStartTimeMap.put(key, accumulateTime);
// 更新bolt中的交易量记录器
recordTradeVolumeMap.put(key, tradeVolumeMapCopy.get(key));
// 更新bolt中的总金额记录器
recordTotalPriceMap.put(key, totalPriceMapCopy.get(key));
}
}else {
// 初始化bolt中的时间记录器
recordStartTimeMap.put(key, startTimeMapCopy.get(key));
// 初始化bolt中的交易量记录器
recordTradeVolumeMap.put(key, tradeVolumeMapCopy.get(key));
// 初始化bolt中的总金额记录器
recordTotalPriceMap.put(key, totalPriceMapCopy.get(key));
}
}
// System.out.println("初始"+recordStartTimeMap);
// System.out.println("当前"+accumulateTimeMapCopy);
// System.out.println("---------------------------------");
System.out.printf("总耗时:%ds \n", (System.currentTimeMillis() - this.startTime) / 1000);
collector.ack(tuple);
}
@Override
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
}
}
实现两个具体任务是由两个bolt完成的,如果我们同时运行两个bolt,有时候需要来查看结果,但是打印输出的时间消耗远远超过了数据的计算,在测试的时候,如果两个bolt打印输出的量不同,消耗的资源就不同,如果差异过大就会引发线程错误的问题,所以要控制两个bolt之间的资源分配。
目前上面的代码是在本地集群进行测试的,在打包后提交到真实集群出现了spout接受不到kafka消息的问题,此问题暂时还未解决,将在后期大作业中解决后更新。