相关依赖如下:
2.7.2
1.2.3
0.10.0.0
2.2.1
5.3.0
org.apache.hbase
hbase-common
${hbase.version}
org.slf4j
slf4j-log4j12
org.apache.hbase
hbase-server
${hbase.version}
org.slf4j
slf4j-log4j12
io.netty
netty-all
org.apache.hbase
hbase-client
${hbase.version}
io.netty
netty-all
org.apache.hbase
hbase-protocol
${hbase.version}
org.apache.hbase
hbase-annotations
${hbase.version}
org.apache.hbase
hbase-hadoop2-compat
${hbase.version}
org.apache.kafka
kafka_2.11
${kafka.version}
org.apache.kafka
kafka-clients
${kafka.version}
com.alibaba.jstorm
jstorm-core
${jstorm.version}
org.elasticsearch
elasticsearch
${elasticsearch.version}
org.elasticsearch.client
transport
${elasticsearch.version}
将以上需要的jar包拷贝到JStorm目录下的extlib目录下面
Java实例代码如下:
import java.util.ArrayList;
import java.util.List;
import org.platform.modules.jstorm.bolt.elastic.ElasticBolt;
import org.platform.modules.jstorm.bolt.hbase.HBaseBolt;
import org.platform.modules.jstorm.spout.kafka.BrokerHosts;
import org.platform.modules.jstorm.spout.kafka.KafkaSpout;
import org.platform.modules.jstorm.spout.kafka.SpoutConfig;
import org.platform.modules.jstorm.spout.kafka.StringMultiScheme;
import org.platform.modules.jstorm.spout.kafka.ZkHosts;
import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.utils.Utils;
public class SimpleJStormTopology {
private static final String TOPOLOGY_NAME = "Kafka2HBaseElastic";
private static final String KAFKA_SPOUT = "KafkaSpout";
private static final String HBASE_BOLT = "HBaseBolt";
private static final String ELASTIC_BOLT = "ElasticBolt";
private static boolean isCluster = true;
public static void main(String[] args) {
String brokerZks = "192.168.0.11:2181,192.168.0.12:2181,192.168.0.13:2181";
String topic = "elastic5";
String zkRoot = "/kafka";
String id = "jstormdataprocess";
BrokerHosts brokerHosts = new ZkHosts(brokerZks, "/kafka/brokers");
SpoutConfig spoutConfig = new SpoutConfig(brokerHosts, topic, zkRoot, id);
spoutConfig.startOffsetTime = -1L;// -2 从kafka头开始 -1 是从最新的开始 0 从ZK开始
spoutConfig.scheme = new StringMultiScheme();
List zkServers = new ArrayList();
zkServers.add("192.168.0.11");
zkServers.add("192.168.0.12");
zkServers.add("192.168.0.13");
spoutConfig.zkServers = zkServers;
spoutConfig.zkPort = 2181;
TopologyBuilder builder = new TopologyBuilder();
KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig);
builder.setSpout(KAFKA_SPOUT, kafkaSpout, 1);
ElasticBolt elasticBolt = new ElasticBolt();
builder.setBolt(ELASTIC_BOLT, elasticBolt, 3).setNumTasks(6).localOrShuffleGrouping(KAFKA_SPOUT);
HBaseBolt hbaseBolt = new HBaseBolt();
builder.setBolt(HBASE_BOLT, hbaseBolt, 3).setNumTasks(6).localOrShuffleGrouping(KAFKA_SPOUT);
/**
BaseHBaseMapper mapper = new BaseHBaseMapper()
.withRowKeyField("_id").withColumnFamily("i");
List fields = new ArrayList();
fields.add("insertTime");
fields.add("updateTime");
fields.add("sourceFile");
mapper.withColumnFields(new Fields(fields));
BaseHBaseBolt hbaseBolt = new BaseHBaseBolt("logistics", mapper)
.withConfigKey("hbase.config").withBatchSize(1000);
builder.setBolt(HBASE_BOLT, hbaseBolt, 2).allGrouping(KAFKA_SPOUT);
*/
Config config = new Config();
/**
Map hbaseConfig = new HashMap();
hbaseConfig.put("hbase.master", "192.168.0.15:60000");
hbaseConfig.put("hbase.rootdir", "/hbase");
hbaseConfig.put("hbase.zookeeper.quorum", "192.168.0.15,192.168.0.16,192.168.0.17");
hbaseConfig.put("hbase.zookeeper.property.clientPort", "2181");
config.put("hbase.config", hbaseConfig);
**/
config.setDebug(true);
/**
config.put(Config.STORM_META_SERIALIZATION_DELEGATE, "org.apache.storm.serialization.SerializationDelegate");
*/
if (isCluster) {
try {
config.setNumWorkers(3);
StormSubmitter.submitTopologyWithProgressBar(TOPOLOGY_NAME, config, builder.createTopology());
} catch (Exception e) {
e.printStackTrace();
}
} else {
try {
LocalCluster cluster = new LocalCluster();
cluster.submitTopology(TOPOLOGY_NAME, config, builder.createTopology());
Utils.sleep(10000000);
cluster.killTopology(TOPOLOGY_NAME);
cluster.shutdown();
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
import java.util.Map;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Tuple;
public class ElasticBolt extends BaseRichBolt {
private static final long serialVersionUID = 1L;
private int batchSize = 1000;
private ElasticHelper elasticHelper = null;
@SuppressWarnings("rawtypes")
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
elasticHelper = new ElasticHelper(batchSize, collector);
}
public void execute(Tuple input) {
try {
elasticHelper.add(input);
} catch (Exception e) {
elasticHelper.fail(e);
}
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
}
}
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.platform.modules.elastic5.ESClient;
import org.platform.utils.json.GsonUtils;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.client.Client;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import backtype.storm.task.OutputCollector;
import backtype.storm.tuple.Tuple;
public class ElasticHelper {
private static Logger LOG = LoggerFactory.getLogger(ElasticHelper.class);
private List tuples = null;
private List datas = null;
private int batchSize = 1000;
private OutputCollector collector = null;
public ElasticHelper(int batchSize, OutputCollector collector) {
this.tuples = new ArrayList();
this.datas = new ArrayList();
if (batchSize > 0) this.batchSize = batchSize;
this.collector = collector;
}
public void add(Tuple tuple) {
tuples.add(tuple);
datas.add(tuple.getString(0));
if (tuples.size() == batchSize) {
bulkInsert(datas);
datas.clear();
ack();
}
}
public void ack() {
for (int i = 0, len = tuples.size(); i < len; i++) {
collector.ack(tuples.get(i));
}
tuples.clear();
}
public void fail(Exception e) {
collector.reportError(e);
for (int i = 0, len = tuples.size(); i < len; i++) {
collector.fail(tuples.get(i));
}
tuples.clear();
datas.clear();
}
public void bulkInsert(List datas) {
if (null == datas || datas.size() == 0) return;
Client client = ESClient.getInstance().getClient();
BulkRequestBuilder bulkRequestBuilder = client.prepareBulk();
try {
IndexRequestBuilder irb = null;
for (int i = 0, len = datas.size(); i < len; i++) {
bulkRequestBuilder.add(irb);
}
} catch (Exception e) {
LOG.error(e.getMessage(), e);
}
BulkResponse bulkResponse = bulkRequestBuilder.execute().actionGet();
if (bulkResponse.hasFailures()) {
LOG.info(bulkResponse.buildFailureMessage());
}
System.out.println("elastic5 insert " + datas.size() + " records finish!");
}
}
import java.util.Map;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Tuple;
public class HBaseBolt extends BaseRichBolt {
private static final long serialVersionUID = 1L;
private int batchSize = 1000;
private HBaseHelper hbaseHelper = null;
@SuppressWarnings("rawtypes")
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
hbaseHelper = new HBaseHelper(batchSize, collector);
}
public void execute(Tuple input) {
try {
hbaseHelper.add(input);
} catch (Exception e) {
hbaseHelper.fail(e);
}
}
public void declareOutputFields(OutputFieldsDeclarer declarer) {
}
}
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.util.Bytes;
import org.platform.utils.bigdata.HBaseUtils;
import org.platform.utils.json.GsonUtils;
import org.platform.utils.serde.SerializerUtils;
import backtype.storm.task.OutputCollector;
import backtype.storm.tuple.Tuple;
public class HBaseHelper {
private List tuples = null;
private List datas = null;
private int batchSize = 1000;
private OutputCollector collector = null;
public HBaseHelper(int batchSize, OutputCollector collector) {
this.tuples = new ArrayList();
this.datas = new ArrayList();
if (batchSize > 0) this.batchSize = batchSize;
this.collector = collector;
}
public void add(Tuple tuple) {
tuples.add(tuple);
datas.add(tuple.getString(0));
if (tuples.size() == batchSize) {
bulkInsert(datas);
datas.clear();
ack();
}
}
public void ack() {
for (int i = 0, len = tuples.size(); i < len; i++) {
collector.ack(tuples.get(i));
}
tuples.clear();
}
public void fail(Exception e) {
collector.reportError(e);
for (int i = 0, len = tuples.size(); i < len; i++) {
collector.fail(tuples.get(i));
}
tuples.clear();
datas.clear();
}
public void bulkInsert(List datas) {
if (null == datas || datas.size() == 0) return;
Map> map = new HashMap>();
Map source = null;
for (int i = 0, len = datas.size(); i < len; i++) {
source = GsonUtils.fromJsonToMap(datas.get(i));
source.remove("index");
String tableName = String.valueOf(source.remove("type"));
List puts = map.get(tableName);
if (null == puts) {
puts = new ArrayList();
map.put(tableName, puts);
}
String rowKey = String.valueOf(source.remove("_id"));
Put put = new Put(Bytes.toBytes(rowKey));
for (Map.Entry entry : source.entrySet()) {
String column = entry.getKey();
String family = column.startsWith("c") ? "i" : "s";
put.addColumn(Bytes.toBytes(family), Bytes.toBytes(column), SerializerUtils.write(entry.getValue()));
}
puts.add(put);
}
for (Map.Entry> entry : map.entrySet()) {
try {
HBaseUtils.insertRecords(entry.getKey(), entry.getValue());
} catch (Exception e) {
e.printStackTrace();
}
}
System.out.println("hbase insert " + datas.size() + " records finish!");
}
}
代码链接: https://github.com/fighting-one-piece/data-handle/tree/master/dataprocessjstorm