Storm集成HBase、JDBC、Kafka、Hive

代码参上

/**
 * storm集成Kafka、Hive、JDBC、HBase、HDFS
 * Created by sker on 17-11-13
 * kafka集成storm,将数据发到JobBolt做中文分词逻辑;
 * 结果发到不同bolt,然后分别存入hive、hbase、mysql和hdfs
 */
public class SegGoGo {

    public static void main(String[] args) {

        //创建一个TopologyBuilder实例
        TopologyBuilder topologyBuilder = new TopologyBuilder();
        LocalCluster localCluster = new LocalCluster();
        Config conf = new Config();

        /**
         * 以下是kafka到storm的逻辑
         */

        //kafka与storm集成需要一个zkHost和一个SpoutConfig
        ZkHosts zkHosts = new ZkHosts("localhost:2181");
        SpoutConfig spoutConfig = new SpoutConfig(zkHosts, "hbase", "/storm", "kafka");

        /**
         * 以下代码要做的是storm与HDFS集成
         */

        //kafka与HDFS集成需要一个HDFSBolt,并进行相应参数的设定
        HdfsBolt hdfsBolt = new HdfsBolt()
                .withFsUrl("hdfs://localhost:9000/")//设置hdfs的url
                .withRecordFormat(new DelimitedRecordFormat().withFieldDelimiter(","))//设置文件分割符
                .withSyncPolicy(new CountSyncPolicy(10))//同步政策
                .withFileNameFormat(new DefaultFileNameFormat().withPath("/test"))//文件命名格式,参数中设置了文件路径
                .withRotationPolicy(new FileSizeRotationPolicy(1.0f, FileSizeRotationPolicy.Units.KB));//设置滚动生成文件的参数,此处为1k生成一个文件

        /**
         * 以下代码要做的是storm与hbase集成
         */

        //storm与hbase集成
        Config config = new Config();
        Map hbConf = new HashMap();
        hbConf.put("hbase.rootdir","hdfs://localhost:9000/sbsbsbs/hbase/");
        hbConf.put("hbase.zookeeper.quorum", "localhost:2181");
        config.put("hbase.conf", hbConf);

        SimpleHBaseMapper simpleHBaseMapper = new SimpleHBaseMapper()
                .withColumnFamily("cf")
                .withColumnFields(new Fields("word","count"))
                .withRowKeyField("word");

        HBaseBolt hBaseBolt = new HBaseBolt("demo",simpleHBaseMapper)
                .withConfigKey("hbase.conf");


        /**
         * 以下代码要做的是storm与JDBC集成
         */
        Map hikariConfigMap = Maps.newHashMap();
        hikariConfigMap.put("dataSourceClassName","com.mysql.jdbc.jdbc2.optional.MysqlDataSource");
        hikariConfigMap.put("dataSource.url", "jdbc:mysql://localhost/test?useunicode=true&characterencoding=utf-8");
        hikariConfigMap.put("dataSource.user","root");
        hikariConfigMap.put("dataSource.password","1327");

        ConnectionProvider connectionProvider = new HikariCPConnectionProvider(hikariConfigMap);

        String tableName = "seg";
        JdbcMapper simpleJdbcMapper = new SimpleJdbcMapper(tableName, connectionProvider);

        JdbcInsertBolt insertBolt = new JdbcInsertBolt(connectionProvider, simpleJdbcMapper)
//                .withTableName("seg")     //没卵用
                .withInsertQuery("insert into seg values (?,?)")
                .withQueryTimeoutSecs(30);

        JdbcInsertBolt selectBolt = new JdbcInsertBolt(connectionProvider, simpleJdbcMapper)
                .withInsertQuery("select word,count(word) from seg group by word")
                .withQueryTimeoutSecs(30);

        /**
         * 构建拓扑
         */
        //kafka到storm的spout,构建拓扑的第一步
        topologyBuilder.setSpout("kafka", new KafkaSpout(spoutConfig));
        //数据进入JobBolt做中文分词处理
        topologyBuilder.setBolt("document",new JobBolt.GetDocument()).shuffleGrouping("kafka");
        topologyBuilder.setBolt("wordCount",new JobBolt.StringToWordCount()).shuffleGrouping("document");
        //数据插入mysql
        topologyBuilder.setBolt("jdbc_insert",insertBolt).shuffleGrouping("wordCount");
        //查询mysql
        topologyBuilder.setBolt("jdbc_select",selectBolt).shuffleGrouping("jdbc_insert");
        //数据存入HDFS
        topologyBuilder.setBolt("hdfs",hdfsBolt).shuffleGrouping("jdbc_select");
        //数据存入HBase
        topologyBuilder.setBolt("hbase",hBaseBolt).shuffleGrouping("wordCount");


        localCluster.submitTopology("SegGoGo",config,topologyBuilder.createTopology());
storm与hive的集成

/**
 * storm和hive集成比较麻烦,不适合word_segmentation包里一起做联合测试,
 * 而且storm和HDFS集成很简单,可以直接storm-hdfs然后load到hive表
 *
 *storm跟hive集成,需要修改hive配置,包括开启自动分区、设置metadate的uris、设置jdbc以及开启hive.in.test(参考文件为同包下hive-site.xml);
 * 确保实际环境的hive版本和代码中的jar包版本一致;确保metadate和hiveserver2开启
 *
 * 本测试的hive建表语句
 * create table demo (id int,name string,sex string) partitioned by (age int) clustered by (id) into 3 buckets stored as orc tblproperties ("orc.compress"="NONE",'transactional'='true');
 *
 * storm-hive集成真的很烦,稍不注意就会失败,而且调错更烦,有兴趣的可以自己测试,希望你能成功,哈哈
 */
public class Storm2Hive {
    static class Storm_Hive_Spout extends BaseRichSpout {
        SpoutOutputCollector spoutOutputCollector;
        String[] name = {"aa","bb","cc","dd","ee","ff","gg","hh"};
        String[] sex = {"man","woman"};
        int[] id = {1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};

        Random random = new Random();

        public void open(Map map, TopologyContext topologyContext, SpoutOutputCollector spoutOutputCollector) {
            this.spoutOutputCollector=spoutOutputCollector;
        }

        public void nextTuple() {
            Utils.sleep(1000);

            String s = name[random.nextInt(name.length)];
            String sex1 = sex[random.nextInt(sex.length)];
            int id1 = id[random.nextInt(id.length)];
            spoutOutputCollector.emit(new Values(id1,s,sex1,"18"));
            System.out.println(""+id1+":"+s+":"+sex1);

        }

        public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
            outputFieldsDeclarer.declare(new Fields("id","name","sex","age"));
        }
    }

    public static void main(String[] args) {


        DelimitedRecordHiveMapper delimitedRecordHiveMapper = new DelimitedRecordHiveMapper();//映射字段,spout那边发来的
        delimitedRecordHiveMapper.withColumnFields(new Fields("id","name","sex"))
                .withPartitionFields(new Fields("age"));

        HiveOptions hiveOptions = new HiveOptions("thrift://localhost:9083","default","demo",delimitedRecordHiveMapper);
        hiveOptions.withTxnsPerBatch(10)
                .withBatchSize(20)
                .withIdleTimeout(10);

        HiveBolt hiveBolt = new HiveBolt(hiveOptions);

        TopologyBuilder topologyBuilder = new TopologyBuilder();
        topologyBuilder.setSpout("spout",new Storm_Hive_Spout());
        topologyBuilder.setBolt("bolt",hiveBolt).shuffleGrouping("spout");

        LocalCluster localCluster = new LocalCluster();
        localCluster.submitTopology("go",new Config(),topologyBuilder.createTopology());
完整代码参见github: https://github.com/ZzzzZzreal/StormGoGo/tree/master/src/main/java

storm集成Hive

storm_hive

Storm集成HBase、JDBC、Kafka、HDFS

word_segmentation

你可能感兴趣的:(storm)