代码参上
/**
* storm集成Kafka、Hive、JDBC、HBase、HDFS
* Created by sker on 17-11-13
* kafka集成storm,将数据发到JobBolt做中文分词逻辑;
* 结果发到不同bolt,然后分别存入hive、hbase、mysql和hdfs
*/
public class SegGoGo {
public static void main(String[] args) {
//创建一个TopologyBuilder实例
TopologyBuilder topologyBuilder = new TopologyBuilder();
LocalCluster localCluster = new LocalCluster();
Config conf = new Config();
/**
* 以下是kafka到storm的逻辑
*/
//kafka与storm集成需要一个zkHost和一个SpoutConfig
ZkHosts zkHosts = new ZkHosts("localhost:2181");
SpoutConfig spoutConfig = new SpoutConfig(zkHosts, "hbase", "/storm", "kafka");
/**
* 以下代码要做的是storm与HDFS集成
*/
//kafka与HDFS集成需要一个HDFSBolt,并进行相应参数的设定
HdfsBolt hdfsBolt = new HdfsBolt()
.withFsUrl("hdfs://localhost:9000/")//设置hdfs的url
.withRecordFormat(new DelimitedRecordFormat().withFieldDelimiter(","))//设置文件分割符
.withSyncPolicy(new CountSyncPolicy(10))//同步政策
.withFileNameFormat(new DefaultFileNameFormat().withPath("/test"))//文件命名格式,参数中设置了文件路径
.withRotationPolicy(new FileSizeRotationPolicy(1.0f, FileSizeRotationPolicy.Units.KB));//设置滚动生成文件的参数,此处为1k生成一个文件
/**
* 以下代码要做的是storm与hbase集成
*/
//storm与hbase集成
Config config = new Config();
Map hbConf = new HashMap();
hbConf.put("hbase.rootdir","hdfs://localhost:9000/sbsbsbs/hbase/");
hbConf.put("hbase.zookeeper.quorum", "localhost:2181");
config.put("hbase.conf", hbConf);
SimpleHBaseMapper simpleHBaseMapper = new SimpleHBaseMapper()
.withColumnFamily("cf")
.withColumnFields(new Fields("word","count"))
.withRowKeyField("word");
HBaseBolt hBaseBolt = new HBaseBolt("demo",simpleHBaseMapper)
.withConfigKey("hbase.conf");
/**
* 以下代码要做的是storm与JDBC集成
*/
Map hikariConfigMap = Maps.newHashMap();
hikariConfigMap.put("dataSourceClassName","com.mysql.jdbc.jdbc2.optional.MysqlDataSource");
hikariConfigMap.put("dataSource.url", "jdbc:mysql://localhost/test?useunicode=true&characterencoding=utf-8");
hikariConfigMap.put("dataSource.user","root");
hikariConfigMap.put("dataSource.password","1327");
ConnectionProvider connectionProvider = new HikariCPConnectionProvider(hikariConfigMap);
String tableName = "seg";
JdbcMapper simpleJdbcMapper = new SimpleJdbcMapper(tableName, connectionProvider);
JdbcInsertBolt insertBolt = new JdbcInsertBolt(connectionProvider, simpleJdbcMapper)
// .withTableName("seg") //没卵用
.withInsertQuery("insert into seg values (?,?)")
.withQueryTimeoutSecs(30);
JdbcInsertBolt selectBolt = new JdbcInsertBolt(connectionProvider, simpleJdbcMapper)
.withInsertQuery("select word,count(word) from seg group by word")
.withQueryTimeoutSecs(30);
/**
* 构建拓扑
*/
//kafka到storm的spout,构建拓扑的第一步
topologyBuilder.setSpout("kafka", new KafkaSpout(spoutConfig));
//数据进入JobBolt做中文分词处理
topologyBuilder.setBolt("document",new JobBolt.GetDocument()).shuffleGrouping("kafka");
topologyBuilder.setBolt("wordCount",new JobBolt.StringToWordCount()).shuffleGrouping("document");
//数据插入mysql
topologyBuilder.setBolt("jdbc_insert",insertBolt).shuffleGrouping("wordCount");
//查询mysql
topologyBuilder.setBolt("jdbc_select",selectBolt).shuffleGrouping("jdbc_insert");
//数据存入HDFS
topologyBuilder.setBolt("hdfs",hdfsBolt).shuffleGrouping("jdbc_select");
//数据存入HBase
topologyBuilder.setBolt("hbase",hBaseBolt).shuffleGrouping("wordCount");
localCluster.submitTopology("SegGoGo",config,topologyBuilder.createTopology());
storm与hive的集成
/**
* storm和hive集成比较麻烦,不适合word_segmentation包里一起做联合测试,
* 而且storm和HDFS集成很简单,可以直接storm-hdfs然后load到hive表
*
*storm跟hive集成,需要修改hive配置,包括开启自动分区、设置metadate的uris、设置jdbc以及开启hive.in.test(参考文件为同包下hive-site.xml);
* 确保实际环境的hive版本和代码中的jar包版本一致;确保metadate和hiveserver2开启
*
* 本测试的hive建表语句
* create table demo (id int,name string,sex string) partitioned by (age int) clustered by (id) into 3 buckets stored as orc tblproperties ("orc.compress"="NONE",'transactional'='true');
*
* storm-hive集成真的很烦,稍不注意就会失败,而且调错更烦,有兴趣的可以自己测试,希望你能成功,哈哈
*/
public class Storm2Hive {
static class Storm_Hive_Spout extends BaseRichSpout {
SpoutOutputCollector spoutOutputCollector;
String[] name = {"aa","bb","cc","dd","ee","ff","gg","hh"};
String[] sex = {"man","woman"};
int[] id = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
Random random = new Random();
public void open(Map map, TopologyContext topologyContext, SpoutOutputCollector spoutOutputCollector) {
this.spoutOutputCollector=spoutOutputCollector;
}
public void nextTuple() {
Utils.sleep(1000);
String s = name[random.nextInt(name.length)];
String sex1 = sex[random.nextInt(sex.length)];
int id1 = id[random.nextInt(id.length)];
spoutOutputCollector.emit(new Values(id1,s,sex1,"18"));
System.out.println(""+id1+":"+s+":"+sex1);
}
public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
outputFieldsDeclarer.declare(new Fields("id","name","sex","age"));
}
}
public static void main(String[] args) {
DelimitedRecordHiveMapper delimitedRecordHiveMapper = new DelimitedRecordHiveMapper();//映射字段,spout那边发来的
delimitedRecordHiveMapper.withColumnFields(new Fields("id","name","sex"))
.withPartitionFields(new Fields("age"));
HiveOptions hiveOptions = new HiveOptions("thrift://localhost:9083","default","demo",delimitedRecordHiveMapper);
hiveOptions.withTxnsPerBatch(10)
.withBatchSize(20)
.withIdleTimeout(10);
HiveBolt hiveBolt = new HiveBolt(hiveOptions);
TopologyBuilder topologyBuilder = new TopologyBuilder();
topologyBuilder.setSpout("spout",new Storm_Hive_Spout());
topologyBuilder.setBolt("bolt",hiveBolt).shuffleGrouping("spout");
LocalCluster localCluster = new LocalCluster();
localCluster.submitTopology("go",new Config(),topologyBuilder.createTopology());
完整代码参见github: https://github.com/ZzzzZzreal/StormGoGo/tree/master/src/main/java
storm集成Hive
storm_hive
Storm集成HBase、JDBC、Kafka、HDFS
word_segmentation