解决方案:
1 通过storm集成kafka依赖,实例化TridentKafkaConfig连接kafka并开启四个线程分别订阅相应日志主题。
2.通过trident编写topology,第一个bolt确定该条日志的level并发射,再利用groupBy将所有batch分到一组,再通过Count计算出日志条数。
3.通过hbase api将计算出的条数和等级写到hbase中。
Maven依赖(storm0.9.7 kafka0.11 )
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-testing-util</artifactId>
<version>1.4.9</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.storm</groupId>
<artifactId>storm-core</artifactId>
<version>0.9.7</version>
</dependency>
<dependency>
<groupId>net.wurstmeister.storm</groupId>
<artifactId>storm-kafka-0.8-plus</artifactId>
<version>0.4.0</version>
</dependency>
<dependency>
<groupId>commons-collections</groupId>
<artifactId>commons-collections</artifactId>
<version>3.2.1</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>15.0</version>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>0.11.0.0</version>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.14</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>log4j-over-slf4j</artifactId>
<version>1.7.21</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-common</artifactId>
<version>1.4.9</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>1.4.9</version>
<scope>compile</scope>
</dependency>
storm拓扑结构
ZkHosts zkHosts = new ZkHosts("master:2181");
//host kafka topic root consumer group
TridentKafkaConfig kafkaConfig = new TridentKafkaConfig(zkHosts, topic);
OpaqueTridentKafkaSpout kafkaSpout = new OpaqueTridentKafkaSpout(kafkaConfig);
TridentTopology tridentTopology = new TridentTopology();
Stream inputStream = tridentTopology.newStream("spout", kafkaSpout);
inputStream.each(new Fields("bytes"), new SetKey(), new Fields("level"))
.groupBy(new Fields("level"))
.aggregate(new Count(), new Fields("count"))
.each(new Fields("level", "count"), new putIntoHBase(), new Fields())
.parallelismHint(1);
bolt
class SetKey extends BaseFunction {
String level = null;
@Override
public void execute(TridentTuple tridentTuple, TridentCollector tridentCollector) {
String message = new String((byte[]) tridentTuple.get(0));
System.out.println("message" + message);
if (level == null) {
if (message.contains("INFO"))
level = "info";
else if (message.contains("ERROR"))
level = "error";
else if (message.contains("WARN"))
level = "warn";
else
level = "debug";
}
System.out.println("level" + level);
tridentCollector.emit(new Values(level));
}
}
class putIntoHBase extends BaseFunction {
@Override
public void execute(TridentTuple tridentTuple, TridentCollector tridentCollector) {
String level = tridentTuple.getStringByField("level");
Long count = tridentTuple.getLongByField("count");
System.out.println("count" + count);
System.out.println("level" + tridentTuple.getStringByField("level"));
Configuration config = HBaseConfiguration.create();
config.set("hbase.zookeeper.quorum", "master");
Table hTable = null;
// Instantiating HTable class
try {
Connection con = ConnectionFactory.createConnection(config);
//获取loglevel表
hTable = con.getTable(TableName.valueOf("loglevel"));
Put p = new Put(Bytes.toBytes(level));
// adding values using add() method
// accepts column family name, qualifier/row name ,value
p.add(Bytes.toBytes("result"),
Bytes.toBytes("count"), Bytes.toBytes(Long.toString(count)));
hTable.put(p);
hTable.close();
} catch (IOException e) {
e.printStackTrace();
}
System.out.println("data inserted");
}
}