JStorm学习笔记-基于Kafka、ElasticSearch、HBase简单实例

相关依赖如下:

2.7.2
1.2.3
0.10.0.0
2.2.1
5.3.0

 



	org.apache.hbase
	hbase-common
	${hbase.version}
	
		
			org.slf4j
			slf4j-log4j12
		
	



	org.apache.hbase
	hbase-server
	${hbase.version}
	
		
			org.slf4j
			slf4j-log4j12
		
		
			io.netty
			netty-all
		
	



	org.apache.hbase
	hbase-client
	${hbase.version}
	
		
			io.netty
			netty-all
		
	



	org.apache.hbase
	hbase-protocol
	${hbase.version}



	org.apache.hbase
	hbase-annotations
	${hbase.version}



	org.apache.hbase
	hbase-hadoop2-compat
	${hbase.version}




	org.apache.kafka
	kafka_2.11
	${kafka.version}



	org.apache.kafka
	kafka-clients
	${kafka.version}




	com.alibaba.jstorm
	jstorm-core
	${jstorm.version}




	org.elasticsearch
	elasticsearch
	${elasticsearch.version}



	org.elasticsearch.client
	transport
	${elasticsearch.version}

将以上需要的jar包拷贝到JStorm目录下的extlib目录下面

Java实例代码如下:

import java.util.ArrayList;
import java.util.List;

import org.platform.modules.jstorm.bolt.elastic.ElasticBolt;
import org.platform.modules.jstorm.bolt.hbase.HBaseBolt;
import org.platform.modules.jstorm.spout.kafka.BrokerHosts;
import org.platform.modules.jstorm.spout.kafka.KafkaSpout;
import org.platform.modules.jstorm.spout.kafka.SpoutConfig;
import org.platform.modules.jstorm.spout.kafka.StringMultiScheme;
import org.platform.modules.jstorm.spout.kafka.ZkHosts;

import backtype.storm.Config;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.topology.TopologyBuilder;
import backtype.storm.utils.Utils;

public class SimpleJStormTopology {
   
	private static final String TOPOLOGY_NAME = "Kafka2HBaseElastic";
	
	private static final String KAFKA_SPOUT = "KafkaSpout";
	
	private static final String HBASE_BOLT = "HBaseBolt";
	
	private static final String ELASTIC_BOLT = "ElasticBolt";
	
	private static boolean isCluster = true;
	
	public static void main(String[] args) {
		String brokerZks = "192.168.0.11:2181,192.168.0.12:2181,192.168.0.13:2181";
		String topic = "elastic5";
		String zkRoot = "/kafka";
		String id = "jstormdataprocess";
		
		BrokerHosts brokerHosts = new ZkHosts(brokerZks, "/kafka/brokers");
		SpoutConfig spoutConfig = new SpoutConfig(brokerHosts, topic, zkRoot, id);
		spoutConfig.startOffsetTime = -1L;// -2 从kafka头开始  -1 是从最新的开始  0  从ZK开始
		spoutConfig.scheme = new StringMultiScheme();
		List zkServers = new ArrayList();
		zkServers.add("192.168.0.11");
		zkServers.add("192.168.0.12");
		zkServers.add("192.168.0.13");
		spoutConfig.zkServers = zkServers;
		spoutConfig.zkPort = 2181;
		
		TopologyBuilder builder = new TopologyBuilder();
		
		KafkaSpout kafkaSpout = new KafkaSpout(spoutConfig);
		builder.setSpout(KAFKA_SPOUT, kafkaSpout, 1);
		
		ElasticBolt elasticBolt = new ElasticBolt();
		builder.setBolt(ELASTIC_BOLT, elasticBolt, 3).setNumTasks(6).localOrShuffleGrouping(KAFKA_SPOUT);
		
		HBaseBolt hbaseBolt = new HBaseBolt();
		builder.setBolt(HBASE_BOLT, hbaseBolt, 3).setNumTasks(6).localOrShuffleGrouping(KAFKA_SPOUT);
		
		/**
		BaseHBaseMapper mapper = new BaseHBaseMapper()
			.withRowKeyField("_id").withColumnFamily("i");
		List fields = new ArrayList();
		fields.add("insertTime");
		fields.add("updateTime");
		fields.add("sourceFile");
		mapper.withColumnFields(new Fields(fields));
		BaseHBaseBolt hbaseBolt = new BaseHBaseBolt("logistics", mapper)
			.withConfigKey("hbase.config").withBatchSize(1000);
		builder.setBolt(HBASE_BOLT, hbaseBolt, 2).allGrouping(KAFKA_SPOUT);
		*/
		
		Config config = new Config();
		/**
		Map hbaseConfig = new HashMap();
		hbaseConfig.put("hbase.master", "192.168.0.15:60000");
		hbaseConfig.put("hbase.rootdir", "/hbase");
		hbaseConfig.put("hbase.zookeeper.quorum", "192.168.0.15,192.168.0.16,192.168.0.17");
		hbaseConfig.put("hbase.zookeeper.property.clientPort", "2181");
		config.put("hbase.config", hbaseConfig);
		**/
		config.setDebug(true);
		/**
		config.put(Config.STORM_META_SERIALIZATION_DELEGATE, "org.apache.storm.serialization.SerializationDelegate");
		*/
		
		if (isCluster) {
			try {
				config.setNumWorkers(3);
				StormSubmitter.submitTopologyWithProgressBar(TOPOLOGY_NAME, config, builder.createTopology());
			} catch (Exception e) {
				e.printStackTrace();
			}
		} else {
			try {
				LocalCluster cluster = new LocalCluster();
				cluster.submitTopology(TOPOLOGY_NAME, config, builder.createTopology());
				Utils.sleep(10000000);
				cluster.killTopology(TOPOLOGY_NAME);
				cluster.shutdown();
			} catch (Exception e) {
				e.printStackTrace();
			}
		}
	}
}

 

import java.util.Map;

import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Tuple;

public class ElasticBolt extends BaseRichBolt {

	private static final long serialVersionUID = 1L;
	
	private int batchSize = 1000;
	
	private ElasticHelper elasticHelper = null;
	
	@SuppressWarnings("rawtypes")
	public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
		elasticHelper = new ElasticHelper(batchSize, collector);
	}

	public void execute(Tuple input) {
		try {
			elasticHelper.add(input);
		} catch (Exception e) {
			elasticHelper.fail(e);
		}
	}

	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		
	}
	

}

 

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.platform.modules.elastic5.ESClient;
import org.platform.utils.json.GsonUtils;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.client.Client;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import backtype.storm.task.OutputCollector;
import backtype.storm.tuple.Tuple;

public class ElasticHelper {
	
	private static Logger LOG = LoggerFactory.getLogger(ElasticHelper.class);

	private List tuples = null;
	
	private List datas = null;
	
	private int batchSize = 1000;
	
	private OutputCollector collector = null;
	
	public ElasticHelper(int batchSize, OutputCollector collector) {
		this.tuples = new ArrayList();
		this.datas = new ArrayList();
		if (batchSize > 0) this.batchSize = batchSize;
		this.collector = collector;
	}
	
	public void add(Tuple tuple) {
		tuples.add(tuple);
		datas.add(tuple.getString(0));
		if (tuples.size() == batchSize) {
			bulkInsert(datas);
			datas.clear();
			ack();
		}
	}
	
	public void ack() {
		for (int i = 0, len = tuples.size(); i < len; i++) {
			collector.ack(tuples.get(i));
		}
		tuples.clear();
	}
	
	public void fail(Exception e) {
		collector.reportError(e);
		for (int i = 0, len = tuples.size(); i < len; i++) {
			collector.fail(tuples.get(i));
		}
		tuples.clear();
		datas.clear();
	}
	
	public void bulkInsert(List datas) {
		if (null == datas || datas.size() == 0) return;
		Client client = ESClient.getInstance().getClient();
		BulkRequestBuilder bulkRequestBuilder = client.prepareBulk();
		try {
			IndexRequestBuilder irb = null;
			for (int i = 0, len = datas.size(); i < len; i++) {
				bulkRequestBuilder.add(irb);
			}
		} catch (Exception e) {
			LOG.error(e.getMessage(), e);
		}
		BulkResponse bulkResponse = bulkRequestBuilder.execute().actionGet();
		if (bulkResponse.hasFailures()) {
			LOG.info(bulkResponse.buildFailureMessage());
		}
		System.out.println("elastic5 insert " + datas.size() + " records finish!");
	}
	
}

 

import java.util.Map;

import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Tuple;

public class HBaseBolt extends BaseRichBolt {

	private static final long serialVersionUID = 1L;
	
	private int batchSize = 1000;
	
	private HBaseHelper hbaseHelper = null;
	
	@SuppressWarnings("rawtypes")
	public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
		hbaseHelper = new HBaseHelper(batchSize, collector);
	}

	public void execute(Tuple input) {
		try {
			hbaseHelper.add(input);
		} catch (Exception e) {
			hbaseHelper.fail(e);
		}
	}

	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		
	}
	

}

 

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.util.Bytes;
import org.platform.utils.bigdata.HBaseUtils;
import org.platform.utils.json.GsonUtils;
import org.platform.utils.serde.SerializerUtils;

import backtype.storm.task.OutputCollector;
import backtype.storm.tuple.Tuple;

public class HBaseHelper {
	
	private List tuples = null;
	
	private List datas = null;
	
	private int batchSize = 1000;
	
	private OutputCollector collector = null;
	
	public HBaseHelper(int batchSize, OutputCollector collector) {
		this.tuples = new ArrayList();
		this.datas = new ArrayList();
		if (batchSize > 0) this.batchSize = batchSize;
		this.collector = collector;
	}
	
	public void add(Tuple tuple) {
		tuples.add(tuple);
		datas.add(tuple.getString(0));
		if (tuples.size() == batchSize) {
			bulkInsert(datas);
			datas.clear();
			ack();
		}
	}
	
	public void ack() {
		for (int i = 0, len = tuples.size(); i < len; i++) {
			collector.ack(tuples.get(i));
		}
		tuples.clear();
	}
	
	public void fail(Exception e) {
		collector.reportError(e);
		for (int i = 0, len = tuples.size(); i < len; i++) {
			collector.fail(tuples.get(i));
		}
		tuples.clear();
		datas.clear();
	}
	
	public void bulkInsert(List datas) {
		if (null == datas || datas.size() == 0) return;
		Map> map = new HashMap>();
		Map source = null;
		for (int i = 0, len = datas.size(); i < len; i++) {
			source = GsonUtils.fromJsonToMap(datas.get(i));
			source.remove("index");
			String tableName = String.valueOf(source.remove("type"));
			List puts = map.get(tableName);
			if (null == puts) {
				puts = new ArrayList();
				map.put(tableName, puts);
			}
			String rowKey = String.valueOf(source.remove("_id"));
			Put put = new Put(Bytes.toBytes(rowKey));
			for (Map.Entry entry : source.entrySet()) {
				String column = entry.getKey();
				String family = column.startsWith("c") ? "i" : "s";
				put.addColumn(Bytes.toBytes(family), Bytes.toBytes(column), SerializerUtils.write(entry.getValue()));
			}
			puts.add(put);
		}
		for (Map.Entry> entry : map.entrySet()) {
			try {
				HBaseUtils.insertRecords(entry.getKey(), entry.getValue());
			} catch (Exception e) {
				e.printStackTrace();
			}
		}
		System.out.println("hbase insert " + datas.size() + " records finish!");
	}
	
}

代码链接: https://github.com/fighting-one-piece/data-handle/tree/master/dataprocessjstorm

 

 

你可能感兴趣的:(ElasticSearch,HBase,Kafka)