利用flink从kafka接收消息,统计结果写入mysql,消息写入hive

利用flink从带有kerberos认证kafka中接收消息,每分钟统计各项指标,如每分钟接收的记录数,金额。在统计的时候要累计前一分钟的累计量。统计值 写入mysql,用来前端展示,同时要把消息存入hadoop平台,要在hive建立表
现将把主要逻辑展示如下
1、从指定的kafka的topic接收数据
2、统计数据写入mysql
3、接收的数据写入hdfs,在hive中建外部表的方式,这样速度会更快
4、程序外面还要加个定时任务,给外部表加分区

 
/**
     * 1、从指定的kafka的topic接收数据
     * 2、统计数据写入mysql
     * 3、接收的数据写入hdfs,在hive中建外部表的方式,这样速度会更快
     * 加入两个sink,一个是写mysql,一个写hdfs
     * 程序外面还要加个定时任务,给表加分区,脚本在resources/load_parition.sh
     * @param tableName 写入的hive表名
     */
    public void writePayByKey(String tableName) {
     
        try {
     
            //取时间间隔,每隔多少分钟
            Integer intevalTime = commonProperties.getInterValTime();
            //初始化计算环境,设置checkpoint等信息
            StreamExecutionEnvironment bsEnv = createEnv(checkpointUrl);
            StreamTableEnvironment bsTableEnv = StreamTableEnvironment.create(bsEnv);

            //因为kafka加入了kerberos认证,krb5文件和kafka.jaas(此文件的格式请看我的博文)
            Properties properties = getProperties();
            //如果配置文件中启用了kerberos认证
            if (commonProperties.getKafkaKerberosFlag()) {
     
            //从配置文件中获取kafka.jaas文件的位置
                String kerberosConf = commonProperties.getKafkaKerberosConfig();
            //从配置文件中获取krb5.conf,也就是kdc服务器上的/etc/krb5.conf文件,可以把它放在其它位置,但是要在配置文件指定位置
                String krb5Conf=commonProperties.getKerberoseKrb5Conf();
                log.info("kerberosConf:"+kerberosConf);
                log.info("krb5Conf:"+krb5Conf);
                System.setProperty("java.security.auth.login.config", kerberosConf);
                System.setProperty("java.security.krb5.conf", krb5Conf);
                properties.put("security.protocol", "SASL_PLAINTEXT");
                properties.put("sasl.mechanism", "GSSAPI");
                properties.put("sasl.kerberos.service.name", "kafka");

            }
            //把kafka中的json数据序列化成对象
            FlinkKafkaConsumer<UnionPay> kafkaConsumer011 = new FlinkKafkaConsumer(topicName, new ConsumerDeserializationSchema(UnionPay.class), properties);
            kafkaConsumer011.setStartFromLatest();
            DataStream<UnionPay> dataStreamSource = bsEnv.addSource(kafkaConsumer011);
            //放到一个窗口进行分类统计
            WindowedStream<UnionPay, String, TimeWindow> timeWindowWindowedStream = dataStreamSource.keyBy(new KeySelector<UnionPay, String>() {
     
                @Override
                public String getKey(UnionPay unionPay) throws Exception {
     
                   String mrchNo=MyUtils.getMrchNo(unionPay.getMrchno());
                    return mrchNo;
                }
            }).window(TumblingProcessingTimeWindows.of(Time.minutes(intevalTime)));
            SingleOutputStreamOperator<MonitorTranDetail> resultWin = timeWindowWindowedStream.aggregate(new RsesultAggregateFunc(), new ProcessWindowFunction<MonitorTranDetail, MonitorTranDetail, String, TimeWindow>() {
     
                @Override
                public void process(String s, Context context, Iterable<MonitorTranDetail> iterable, Collector<MonitorTranDetail> collector) {
     
                    try {
     
                        Long lend = context.window().getEnd();
                        String etltime = MyUtils.getDateFromformatter("yyyyMMddHHmmss", lend);
                        String datadate = MyUtils.getDateFromformatter("yyyyMMdd", lend);
                        //String txntime = MyUtils.getDateFromformatter("HHmmss", lend);
                        String txntime=etltime;
                        //设置统计对象的数据时间,这里取的是每个窗口的结束时间
                        for (MonitorTranDetail monitorTranDetail : iterable) {
     
                            monitorTranDetail.setEtlTIme(etltime);
                            monitorTranDetail.setDataDate(datadate);
                            monitorTranDetail.setTxnTime(txntime);
                            collector.collect(monitorTranDetail);
                        }
                    } catch (Exception ex) {
     
                        ex.printStackTrace();
                    }

                }
            });
            //添回sink把统计数据写入
            resultWin.addSink(new DbSinkFunction());
            //把接收的数据写入hdfs
            List<FieldSchema> fieldSchemas = xmlParser.parseField(tableName);
            fieldSchemas.add(new FieldSchema("pexchangeDate", "string", "yyyyMMdd"));
            SingleOutputStreamOperator<String> singleOutputUnionPay = timeWindowWindowedStream.process(new HdfsAllWindowFunction(fieldSchemas));
            //获取数据表所在hdfs位置
            String dataBasedfsdir = commonProperties.getDfsdatabaseDir();
            if (!dataBasedfsdir.endsWith("/")) {
     
                dataBasedfsdir = dataBasedfsdir + "/";
            }
            String hdfsDir = dataBasedfsdir + tableName.toLowerCase() + "/";
            BucketingSink<String> hdfsSink = new BucketingSink<>(hdfsDir);
            // 设置以yyyyMMdd的格式进行切分目录,类似hive的日期分区
            String strSplitformatter = getSplitChar();
            hdfsSink.setBucketer(new HdfsBucketer(strSplitformatter));
            // 设置文件块大小128M,超过128M会关闭当前文件,开启下一个文件
            hdfsSink.setBatchSize(1024 * 1024 * 128L);
            Configuration conf = new Configuration();
            String url = commonProperties.getDfsdefaultName();
            conf.set("fs.default.name", url);
            hdfsSink.setFSConfig(conf);
            // 设置一小时翻滚一次
            hdfsSink.setBatchRolloverInterval(60 * 60 * 1000L);
            hdfsSink.setInactiveBucketThreshold(1000L);
            //设置的是检查两次检查桶不活跃的情况的周期
            hdfsSink.setInactiveBucketCheckInterval(1000L);
            // 设置等待写入的文件前缀,默认是_
            hdfsSink.setPendingPrefix("");
            // 设置等待写入的文件后缀,默认是.pending
            hdfsSink.setPendingSuffix("");
            //设置正在处理的文件前缀,默认为_
            hdfsSink.setInProgressPrefix(".");
            //加入hdfssink
            singleOutputUnionPay.addSink(hdfsSink);
            HiveCatalog hive = new HiveCatalog(hiveCtalog, hiveDatabase, hiveConf, hiveVersion);
            bsTableEnv.registerCatalog(hiveCtalog, hive);
            bsTableEnv.useCatalog(hiveCtalog);
            bsTableEnv.getConfig().setSqlDialect(SqlDialect.HIVE);
            bsTableEnv.useDatabase(hiveDatabase);
         
            //判断是否有表
            List<String> listObj = Arrays.asList(bsTableEnv.listTables());
            List<String> listExist = listObj.stream().filter(r -> r.equalsIgnoreCase(tableName)).collect(Collectors.toList());
            //如果没有表则创建表
            if (listExist.size() == 0) {
     
                String sinkDdl = xmlParser.parseExternalTable(tableName);
                log.info(sinkDdl);
                bsTableEnv.executeSql(sinkDdl);
            }
            bsEnv.execute("Insert_统计数据写入mysql_Hive");
        } catch (Exception e) {
     
            log.error("writePayByKey:{}", e);
        }
    }

一、把kafka中json消息转换成对象

ConsumerDeserializationSchema.java

import com.alibaba.fastjson.JSONObject;
import com.tcloudata.utils.MyUtils;
import org.apache.flink.api.common.serialization.DeserializationSchema;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.typeutils.TypeExtractor;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;


public class ConsumerDeserializationSchema<T> implements DeserializationSchema<T> {
     
    private Class<T> clazz;
    public ConsumerDeserializationSchema(Class<T> clazz) {
     
        this.clazz = clazz;
    }
    @Override
    public T deserialize(byte[] message) throws IOException {
     
        ByteBuffer buffer = ByteBuffer.wrap(message).order(ByteOrder.LITTLE_ENDIAN);
        String mess = MyUtils.byteBuffertoString(buffer);
        //封装为POJO类
        T objTarget=null;
        String className=clazz.getTypeName().toLowerCase();
        //此对象的json格式是嵌套的
        if (className.indexOf("UnionPay".toLowerCase())!=-1)
        {
     
            objTarget= (T) MyUtils.convertUnionPay(mess,clazz);
        }else
        {
     
            objTarget= JSONObject.parseObject(mess, clazz);
        }
        return objTarget;
    }
    @Override
    public boolean isEndOfStream(T t) {
     
        return false;
    }

    @Override
    public TypeInformation<T> getProducedType() {
     
        return TypeExtractor.getForClass(clazz);
    }

}

二、每条消息进行统计

RsesultAggregateFunc.java

import com.tcloudata.model.MonitorTranDetail;
import com.tcloudata.model.UnionPay;
import com.tcloudata.utils.MyUtils;
import org.apache.flink.api.common.functions.AggregateFunction;

import java.math.BigDecimal;

public class RsesultAggregateFunc implements AggregateFunction<UnionPay, MonitorTranDetail, MonitorTranDetail> {
     
    @Override
    public MonitorTranDetail createAccumulator() {
     
        MonitorTranDetail resultInfo = new MonitorTranDetail();
        return resultInfo;
    }

    @Override
    public MonitorTranDetail add(UnionPay unionPay, MonitorTranDetail monitorTranDetail) {
     

        String mrchNo = MyUtils.getMrchNo(unionPay.getMrchno());
        Double amount = unionPay.getAmount();
        Integer txnCnt1 = 0;
        Integer txnCnt2 = 0;
        Integer txnCnt3 = 0;
        Integer txnCnt4 = 0;
        Integer txnCnt5 = 0;
        Integer txnCnt6 = 0;
        //当日消费总笔数
        Integer tottxnCnt = 1;
        //消费金额10000以上笔数
        if (amount >= 10000) {
     
            txnCnt1 = 1;
        }
        //消费金额3000-10000笔数
        if (amount >= 3000 && amount < 10000) {
     
            txnCnt2 = 1;
        }
        //消费金额1000-3000笔数
        if (amount >= 1000 && amount < 3000) {
     
            txnCnt3 = 1;
        }
        // 消费金额500-1000笔数
        if (amount >= 500 && amount < 1000) {
     
            txnCnt4 = 1;
        }
        // 消费金额100-500笔数
        if (amount >= 100 && amount < 500) {
     
            txnCnt5 = 1;
        }
        // 消费金额100以内笔数
        if (amount > 0 && amount < 100) {
     
            txnCnt6 = 1;
        }
        //当日消费总金额
        BigDecimal tmpBigdecimal = new BigDecimal(amount);
        if (mrchNo.equals(monitorTranDetail.getZoneCd())) {
     
            txnCnt1 = monitorTranDetail.getTxnCnt1() + txnCnt1;
            txnCnt2 = monitorTranDetail.getTxnCnt2() + txnCnt2;
            txnCnt3 = monitorTranDetail.getTxnCnt3() + txnCnt3;
            txnCnt4 = monitorTranDetail.getTxnCnt4() + txnCnt4;
            txnCnt5 = monitorTranDetail.getTxnCnt5() + txnCnt5;
            txnCnt6 = monitorTranDetail.getTxnCnt6() + txnCnt6;
            tottxnCnt = monitorTranDetail.getTottxnCntd() + 1;
            BigDecimal d1 = new BigDecimal(amount);
            BigDecimal d2 = new BigDecimal(monitorTranDetail.getTottxnAmountd());
            tmpBigdecimal = d1.add(d2);
        } else {
     
            monitorTranDetail.setZoneCd(mrchNo);
        }
        monitorTranDetail.setTxnCnt1(txnCnt1);
        monitorTranDetail.setTxnCnt2(txnCnt2);
        monitorTranDetail.setTxnCnt2(txnCnt2);
        monitorTranDetail.setTxnCnt3(txnCnt3);
        monitorTranDetail.setTxnCnt4(txnCnt4);
        monitorTranDetail.setTxnCnt5(txnCnt5);
        monitorTranDetail.setTxnCnt6(txnCnt6);
        monitorTranDetail.setTottxnCntd(tottxnCnt);
        monitorTranDetail.setTottxnAmountd(tmpBigdecimal.doubleValue());
        return monitorTranDetail;
    }

    @Override
    public MonitorTranDetail getResult(MonitorTranDetail monitorTranDetail) {
     
        return monitorTranDetail;
    }

    @Override
    public MonitorTranDetail merge(MonitorTranDetail acc1, MonitorTranDetail acc2) {
     
        acc2.setTxnCnt1(acc1.getTxnCnt1() + acc2.getTxnCnt1());
        acc2.setTxnCnt2(acc1.getTxnCnt2() + acc2.getTxnCnt2());
        acc2.setTxnCnt3(acc1.getTxnCnt3() + acc2.getTxnCnt3());
        acc2.setTxnCnt4(acc1.getTxnCnt4() + acc2.getTxnCnt4());
        acc2.setTxnCnt5(acc1.getTxnCnt5() + acc2.getTxnCnt5());
        acc2.setTxnCnt6(acc1.getTxnCnt6() + acc2.getTxnCnt6());
        acc2.setTottxnCntd(acc1.getTottxnCntd() + acc2.getTottxnCntd());
        BigDecimal d1 = new BigDecimal(acc1.getTottxnAmountd());
        BigDecimal d2 = new BigDecimal(acc2.getTottxnAmountd());
        BigDecimal tmpBigdecimal = d1.add(d2);
        acc2.setTottxnAmountd(tmpBigdecimal.doubleValue());
        return acc2;
    }
}

三、pom文件



<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0modelVersion>
    <groupId>com.tcloudatagroupId>
    <artifactId>flinkhrartifactId>
    <version>1.0version>
    <name>flinkhrname>
    
    <properties>
        <flink.version>1.11.2flink.version>
        <project.build.sourceEncoding>UTF-8project.build.sourceEncoding>
        <maven.compiler.source>1.8maven.compiler.source>
        <maven.compiler.target>1.8maven.compiler.target>
        <scala.binary.version>2.11scala.binary.version>
        <hive-jdbc.version>1.2.1hive-jdbc.version>
        <hadoop-common.version>2.6.5hadoop-common.version>
        <kafka.version>2.4.1kafka.version>
    properties>
    <dependencies>
        
        <dependency>
            <groupId>org.apache.flinkgroupId>
            <artifactId>flink-table-api-java-bridge_${scala.binary.version}artifactId>
            <version>${flink.version}version>
        dependency>
        
        <dependency>
            <groupId>org.apache.flinkgroupId>
            <artifactId>flink-table-planner-blink_${scala.binary.version}artifactId>
            <version>${flink.version}version>
        dependency>
        <dependency>
            <groupId>org.apache.flinkgroupId>
            <artifactId>flink-table-planner_${scala.binary.version}artifactId>
            <version>${flink.version}version>
        dependency>
        <dependency>
            <groupId>org.apache.flinkgroupId>
            <artifactId>flink-connector-kafka_${scala.binary.version}artifactId>
            <version>${flink.version}version>
        dependency>
        <dependency>
            <groupId>org.apache.kafkagroupId>
            <artifactId>kafka-clientsartifactId>
            <version>${kafka.version}version>
        dependency>
        <dependency>
            <groupId>org.apache.kafkagroupId>
            <artifactId>kafka_2.11artifactId>
            <version>2.4.1version>
        dependency>
        
        <dependency>
            <groupId>org.apache.flinkgroupId>
            <artifactId>flink-jsonartifactId>
            <version>${flink.version}version>
        dependency>
        
        <dependency>
            <groupId>org.apache.flinkgroupId>
            <artifactId>flink-clients_${scala.binary.version}artifactId>
            <version>${flink.version}version>

        dependency>
        
        <dependency>
            <groupId>org.slf4jgroupId>
            <artifactId>slf4j-log4j12artifactId>
            <version>1.7.7version>
        dependency>
        <dependency>
            <groupId>log4jgroupId>
            <artifactId>log4jartifactId>
            <version>1.2.17version>
        dependency>
        <dependency>
            <groupId>org.apache.flinkgroupId>
            <artifactId>flink-statebackend-rocksdb_${scala.binary.version}artifactId>
            <version>${flink.version}version>
        dependency>
        <dependency>
            <groupId>com.alibabagroupId>
            <artifactId>fastjsonartifactId>
            <version>1.2.54version>
        dependency>
        <dependency>
            <groupId>org.yamlgroupId>
            <artifactId>snakeyamlartifactId>
            <version>1.27version>
        dependency>
        <dependency>
            <groupId>com.oraclegroupId>
            <artifactId>ojdbc6artifactId>
            <version>11.2.0.3version>
        dependency>
        <dependency>
            <groupId>org.apache.flinkgroupId>
            <artifactId>flink-connector-hive_${scala.binary.version}artifactId>
            <version>${flink.version}version>
        dependency>
        <dependency>
            <groupId>org.apache.flinkgroupId>
            <artifactId>flink-connector-filesystem_${scala.binary.version}artifactId>
            <version>${flink.version}version>
        dependency>
        <dependency>
            <groupId>org.apache.bahirgroupId>
            <artifactId>flink-connector-redis_${scala.binary.version}artifactId>
            <version>1.0version>
        dependency>
        <dependency>
            <groupId>dom4jgroupId>
            <artifactId>dom4jartifactId>
            <version>1.6.1version>
        dependency>
        <dependency>
            <groupId>org.apache.flinkgroupId>
            <artifactId>flink-shaded-hadoop-2-uberartifactId>
            <version>2.6.5-8.0version>
        dependency>
        <dependency>
            <groupId>mysqlgroupId>
            <artifactId>mysql-connector-javaartifactId>
            <version>5.1.48version>
        dependency>
        
        <dependency>
            <groupId>org.apache.thriftgroupId>
            <artifactId>libfb303artifactId>
            <version>0.9.0version>
        dependency>
        <dependency>
            <groupId>org.apache.hivegroupId>
            <artifactId>hive-metastoreartifactId>
            <version>1.2.1version>
            <exclusions>
                <exclusion>
                    <groupId>org.apache.hadoopgroupId>
                    <artifactId>*artifactId>
                exclusion>
                <exclusion>
                    <groupId>commons-cligroupId>
                    <artifactId>*artifactId>
                exclusion>
            exclusions>
        dependency>

        <dependency>
            <groupId>org.apache.hivegroupId>
            <artifactId>hive-execartifactId>
            <version>1.2.1version>
            <exclusions>
                <exclusion>
                    <groupId>commons-cligroupId>
                    <artifactId>*artifactId>
                exclusion>
                <exclusion>
                    <groupId>com.googlegroupId>
                    <artifactId>*artifactId>
                exclusion>
                <exclusion>
                    <groupId>org.apache.calcitegroupId>
                    <artifactId>*artifactId>
                exclusion>
            exclusions>
        dependency>
        <dependency>
            <groupId>junitgroupId>
            <artifactId>junitartifactId>
            <version>4.12version>
            <scope>compilescope>
        dependency>
        <dependency>
            <groupId>org.projectlombokgroupId>
            <artifactId>lombokartifactId>
            <version>1.18.6version>
        dependency>
        <dependency>
            <groupId>com.alibabagroupId>
            <artifactId>druidartifactId>
            <version>1.1.17version>
        dependency>
        <dependency>
            <groupId>org.apache.hivegroupId>
            <artifactId>hive-jdbcartifactId>
            <version>${hive-jdbc.version}version>
            <exclusions>
                <exclusion>
                    <groupId>org.eclipse.jetty.aggregategroupId>
                    <artifactId>*artifactId>
                exclusion>
                <exclusion>
                    <groupId>org.slf4jgroupId>
                    <artifactId>slf4j-log4j12artifactId>
                exclusion>
            exclusions>
        dependency>
        <dependency>
            <groupId>redis.clientsgroupId>
            <artifactId>jedisartifactId>
            <version>2.8.0version>
        dependency>
    dependencies>
    <build>
        <plugins>
            
            
            <plugin>
                <groupId>org.apache.maven.pluginsgroupId>
                <artifactId>maven-shade-pluginartifactId>
                <version>3.0.0version>
                <executions>
                    
                    <execution>
                        <phase>packagephase>
                        <goals>
                            <goal>shadegoal>
                        goals>
                        <configuration>
                            <artifactSet>
                                <excludes>
                                    <exclude>org.apache.flink:force-shadingexclude>
                                    <exclude>com.google.code.findbugs:jsr305exclude>
                                    <exclude>org.slf4j:*exclude>
                                    <exclude>log4j:*exclude>
                                excludes>
                            artifactSet>
                            <filters>
                                <filter>
                                    
                                    <artifact>*:*artifact>
                                    <excludes>
                                        <exclude>META-INF/*.SFexclude>
                                        <exclude>META-INF/*.DSAexclude>
                                        <exclude>META-INF/*.RSAexclude>
                                    excludes>
                                filter>
                            filters>
                            <transformers>
                                <transformer
                                        implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
                                    <mainClass>com.tcloudata.AppmainClass>
                                transformer>
                                <transformer
                                        implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
                            transformers>
                        configuration>
                    execution>
                executions>
            plugin>
            
            <plugin>
                <groupId>org.apache.maven.pluginsgroupId>
                <artifactId>maven-compiler-pluginartifactId>
                <version>3.1version>
                <configuration>
                    <source>1.8source>
                    <target>1.8target>
                configuration>
            plugin>
        plugins>
    build>
project>

你可能感兴趣的:(flink,kafka,hive,hdfs)