利用flink从带有kerberos认证kafka中接收消息,每分钟统计各项指标,如每分钟接收的记录数,金额。在统计的时候要累计前一分钟的累计量。统计值 写入mysql,用来前端展示,同时要把消息存入hadoop平台,要在hive建立表
现将把主要逻辑展示如下
1、从指定的kafka的topic接收数据
2、统计数据写入mysql
3、接收的数据写入hdfs,在hive中建外部表的方式,这样速度会更快
4、程序外面还要加个定时任务,给外部表加分区
/**
* 1、从指定的kafka的topic接收数据
* 2、统计数据写入mysql
* 3、接收的数据写入hdfs,在hive中建外部表的方式,这样速度会更快
* 加入两个sink,一个是写mysql,一个写hdfs
* 程序外面还要加个定时任务,给表加分区,脚本在resources/load_parition.sh
* @param tableName 写入的hive表名
*/
public void writePayByKey(String tableName) {
try {
//取时间间隔,每隔多少分钟
Integer intevalTime = commonProperties.getInterValTime();
//初始化计算环境,设置checkpoint等信息
StreamExecutionEnvironment bsEnv = createEnv(checkpointUrl);
StreamTableEnvironment bsTableEnv = StreamTableEnvironment.create(bsEnv);
//因为kafka加入了kerberos认证,krb5文件和kafka.jaas(此文件的格式请看我的博文)
Properties properties = getProperties();
//如果配置文件中启用了kerberos认证
if (commonProperties.getKafkaKerberosFlag()) {
//从配置文件中获取kafka.jaas文件的位置
String kerberosConf = commonProperties.getKafkaKerberosConfig();
//从配置文件中获取krb5.conf,也就是kdc服务器上的/etc/krb5.conf文件,可以把它放在其它位置,但是要在配置文件指定位置
String krb5Conf=commonProperties.getKerberoseKrb5Conf();
log.info("kerberosConf:"+kerberosConf);
log.info("krb5Conf:"+krb5Conf);
System.setProperty("java.security.auth.login.config", kerberosConf);
System.setProperty("java.security.krb5.conf", krb5Conf);
properties.put("security.protocol", "SASL_PLAINTEXT");
properties.put("sasl.mechanism", "GSSAPI");
properties.put("sasl.kerberos.service.name", "kafka");
}
//把kafka中的json数据序列化成对象
FlinkKafkaConsumer<UnionPay> kafkaConsumer011 = new FlinkKafkaConsumer(topicName, new ConsumerDeserializationSchema(UnionPay.class), properties);
kafkaConsumer011.setStartFromLatest();
DataStream<UnionPay> dataStreamSource = bsEnv.addSource(kafkaConsumer011);
//放到一个窗口进行分类统计
WindowedStream<UnionPay, String, TimeWindow> timeWindowWindowedStream = dataStreamSource.keyBy(new KeySelector<UnionPay, String>() {
@Override
public String getKey(UnionPay unionPay) throws Exception {
String mrchNo=MyUtils.getMrchNo(unionPay.getMrchno());
return mrchNo;
}
}).window(TumblingProcessingTimeWindows.of(Time.minutes(intevalTime)));
SingleOutputStreamOperator<MonitorTranDetail> resultWin = timeWindowWindowedStream.aggregate(new RsesultAggregateFunc(), new ProcessWindowFunction<MonitorTranDetail, MonitorTranDetail, String, TimeWindow>() {
@Override
public void process(String s, Context context, Iterable<MonitorTranDetail> iterable, Collector<MonitorTranDetail> collector) {
try {
Long lend = context.window().getEnd();
String etltime = MyUtils.getDateFromformatter("yyyyMMddHHmmss", lend);
String datadate = MyUtils.getDateFromformatter("yyyyMMdd", lend);
//String txntime = MyUtils.getDateFromformatter("HHmmss", lend);
String txntime=etltime;
//设置统计对象的数据时间,这里取的是每个窗口的结束时间
for (MonitorTranDetail monitorTranDetail : iterable) {
monitorTranDetail.setEtlTIme(etltime);
monitorTranDetail.setDataDate(datadate);
monitorTranDetail.setTxnTime(txntime);
collector.collect(monitorTranDetail);
}
} catch (Exception ex) {
ex.printStackTrace();
}
}
});
//添回sink把统计数据写入
resultWin.addSink(new DbSinkFunction());
//把接收的数据写入hdfs
List<FieldSchema> fieldSchemas = xmlParser.parseField(tableName);
fieldSchemas.add(new FieldSchema("pexchangeDate", "string", "yyyyMMdd"));
SingleOutputStreamOperator<String> singleOutputUnionPay = timeWindowWindowedStream.process(new HdfsAllWindowFunction(fieldSchemas));
//获取数据表所在hdfs位置
String dataBasedfsdir = commonProperties.getDfsdatabaseDir();
if (!dataBasedfsdir.endsWith("/")) {
dataBasedfsdir = dataBasedfsdir + "/";
}
String hdfsDir = dataBasedfsdir + tableName.toLowerCase() + "/";
BucketingSink<String> hdfsSink = new BucketingSink<>(hdfsDir);
// 设置以yyyyMMdd的格式进行切分目录,类似hive的日期分区
String strSplitformatter = getSplitChar();
hdfsSink.setBucketer(new HdfsBucketer(strSplitformatter));
// 设置文件块大小128M,超过128M会关闭当前文件,开启下一个文件
hdfsSink.setBatchSize(1024 * 1024 * 128L);
Configuration conf = new Configuration();
String url = commonProperties.getDfsdefaultName();
conf.set("fs.default.name", url);
hdfsSink.setFSConfig(conf);
// 设置一小时翻滚一次
hdfsSink.setBatchRolloverInterval(60 * 60 * 1000L);
hdfsSink.setInactiveBucketThreshold(1000L);
//设置的是检查两次检查桶不活跃的情况的周期
hdfsSink.setInactiveBucketCheckInterval(1000L);
// 设置等待写入的文件前缀,默认是_
hdfsSink.setPendingPrefix("");
// 设置等待写入的文件后缀,默认是.pending
hdfsSink.setPendingSuffix("");
//设置正在处理的文件前缀,默认为_
hdfsSink.setInProgressPrefix(".");
//加入hdfssink
singleOutputUnionPay.addSink(hdfsSink);
HiveCatalog hive = new HiveCatalog(hiveCtalog, hiveDatabase, hiveConf, hiveVersion);
bsTableEnv.registerCatalog(hiveCtalog, hive);
bsTableEnv.useCatalog(hiveCtalog);
bsTableEnv.getConfig().setSqlDialect(SqlDialect.HIVE);
bsTableEnv.useDatabase(hiveDatabase);
//判断是否有表
List<String> listObj = Arrays.asList(bsTableEnv.listTables());
List<String> listExist = listObj.stream().filter(r -> r.equalsIgnoreCase(tableName)).collect(Collectors.toList());
//如果没有表则创建表
if (listExist.size() == 0) {
String sinkDdl = xmlParser.parseExternalTable(tableName);
log.info(sinkDdl);
bsTableEnv.executeSql(sinkDdl);
}
bsEnv.execute("Insert_统计数据写入mysql_Hive");
} catch (Exception e) {
log.error("writePayByKey:{}", e);
}
}
ConsumerDeserializationSchema.java
import com.alibaba.fastjson.JSONObject;
import com.tcloudata.utils.MyUtils;
import org.apache.flink.api.common.serialization.DeserializationSchema;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.typeutils.TypeExtractor;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
public class ConsumerDeserializationSchema<T> implements DeserializationSchema<T> {
private Class<T> clazz;
public ConsumerDeserializationSchema(Class<T> clazz) {
this.clazz = clazz;
}
@Override
public T deserialize(byte[] message) throws IOException {
ByteBuffer buffer = ByteBuffer.wrap(message).order(ByteOrder.LITTLE_ENDIAN);
String mess = MyUtils.byteBuffertoString(buffer);
//封装为POJO类
T objTarget=null;
String className=clazz.getTypeName().toLowerCase();
//此对象的json格式是嵌套的
if (className.indexOf("UnionPay".toLowerCase())!=-1)
{
objTarget= (T) MyUtils.convertUnionPay(mess,clazz);
}else
{
objTarget= JSONObject.parseObject(mess, clazz);
}
return objTarget;
}
@Override
public boolean isEndOfStream(T t) {
return false;
}
@Override
public TypeInformation<T> getProducedType() {
return TypeExtractor.getForClass(clazz);
}
}
RsesultAggregateFunc.java
import com.tcloudata.model.MonitorTranDetail;
import com.tcloudata.model.UnionPay;
import com.tcloudata.utils.MyUtils;
import org.apache.flink.api.common.functions.AggregateFunction;
import java.math.BigDecimal;
public class RsesultAggregateFunc implements AggregateFunction<UnionPay, MonitorTranDetail, MonitorTranDetail> {
@Override
public MonitorTranDetail createAccumulator() {
MonitorTranDetail resultInfo = new MonitorTranDetail();
return resultInfo;
}
@Override
public MonitorTranDetail add(UnionPay unionPay, MonitorTranDetail monitorTranDetail) {
String mrchNo = MyUtils.getMrchNo(unionPay.getMrchno());
Double amount = unionPay.getAmount();
Integer txnCnt1 = 0;
Integer txnCnt2 = 0;
Integer txnCnt3 = 0;
Integer txnCnt4 = 0;
Integer txnCnt5 = 0;
Integer txnCnt6 = 0;
//当日消费总笔数
Integer tottxnCnt = 1;
//消费金额10000以上笔数
if (amount >= 10000) {
txnCnt1 = 1;
}
//消费金额3000-10000笔数
if (amount >= 3000 && amount < 10000) {
txnCnt2 = 1;
}
//消费金额1000-3000笔数
if (amount >= 1000 && amount < 3000) {
txnCnt3 = 1;
}
// 消费金额500-1000笔数
if (amount >= 500 && amount < 1000) {
txnCnt4 = 1;
}
// 消费金额100-500笔数
if (amount >= 100 && amount < 500) {
txnCnt5 = 1;
}
// 消费金额100以内笔数
if (amount > 0 && amount < 100) {
txnCnt6 = 1;
}
//当日消费总金额
BigDecimal tmpBigdecimal = new BigDecimal(amount);
if (mrchNo.equals(monitorTranDetail.getZoneCd())) {
txnCnt1 = monitorTranDetail.getTxnCnt1() + txnCnt1;
txnCnt2 = monitorTranDetail.getTxnCnt2() + txnCnt2;
txnCnt3 = monitorTranDetail.getTxnCnt3() + txnCnt3;
txnCnt4 = monitorTranDetail.getTxnCnt4() + txnCnt4;
txnCnt5 = monitorTranDetail.getTxnCnt5() + txnCnt5;
txnCnt6 = monitorTranDetail.getTxnCnt6() + txnCnt6;
tottxnCnt = monitorTranDetail.getTottxnCntd() + 1;
BigDecimal d1 = new BigDecimal(amount);
BigDecimal d2 = new BigDecimal(monitorTranDetail.getTottxnAmountd());
tmpBigdecimal = d1.add(d2);
} else {
monitorTranDetail.setZoneCd(mrchNo);
}
monitorTranDetail.setTxnCnt1(txnCnt1);
monitorTranDetail.setTxnCnt2(txnCnt2);
monitorTranDetail.setTxnCnt2(txnCnt2);
monitorTranDetail.setTxnCnt3(txnCnt3);
monitorTranDetail.setTxnCnt4(txnCnt4);
monitorTranDetail.setTxnCnt5(txnCnt5);
monitorTranDetail.setTxnCnt6(txnCnt6);
monitorTranDetail.setTottxnCntd(tottxnCnt);
monitorTranDetail.setTottxnAmountd(tmpBigdecimal.doubleValue());
return monitorTranDetail;
}
@Override
public MonitorTranDetail getResult(MonitorTranDetail monitorTranDetail) {
return monitorTranDetail;
}
@Override
public MonitorTranDetail merge(MonitorTranDetail acc1, MonitorTranDetail acc2) {
acc2.setTxnCnt1(acc1.getTxnCnt1() + acc2.getTxnCnt1());
acc2.setTxnCnt2(acc1.getTxnCnt2() + acc2.getTxnCnt2());
acc2.setTxnCnt3(acc1.getTxnCnt3() + acc2.getTxnCnt3());
acc2.setTxnCnt4(acc1.getTxnCnt4() + acc2.getTxnCnt4());
acc2.setTxnCnt5(acc1.getTxnCnt5() + acc2.getTxnCnt5());
acc2.setTxnCnt6(acc1.getTxnCnt6() + acc2.getTxnCnt6());
acc2.setTottxnCntd(acc1.getTottxnCntd() + acc2.getTottxnCntd());
BigDecimal d1 = new BigDecimal(acc1.getTottxnAmountd());
BigDecimal d2 = new BigDecimal(acc2.getTottxnAmountd());
BigDecimal tmpBigdecimal = d1.add(d2);
acc2.setTottxnAmountd(tmpBigdecimal.doubleValue());
return acc2;
}
}
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0modelVersion>
<groupId>com.tcloudatagroupId>
<artifactId>flinkhrartifactId>
<version>1.0version>
<name>flinkhrname>
<properties>
<flink.version>1.11.2flink.version>
<project.build.sourceEncoding>UTF-8project.build.sourceEncoding>
<maven.compiler.source>1.8maven.compiler.source>
<maven.compiler.target>1.8maven.compiler.target>
<scala.binary.version>2.11scala.binary.version>
<hive-jdbc.version>1.2.1hive-jdbc.version>
<hadoop-common.version>2.6.5hadoop-common.version>
<kafka.version>2.4.1kafka.version>
properties>
<dependencies>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-table-api-java-bridge_${scala.binary.version}artifactId>
<version>${flink.version}version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-table-planner-blink_${scala.binary.version}artifactId>
<version>${flink.version}version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-table-planner_${scala.binary.version}artifactId>
<version>${flink.version}version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-connector-kafka_${scala.binary.version}artifactId>
<version>${flink.version}version>
dependency>
<dependency>
<groupId>org.apache.kafkagroupId>
<artifactId>kafka-clientsartifactId>
<version>${kafka.version}version>
dependency>
<dependency>
<groupId>org.apache.kafkagroupId>
<artifactId>kafka_2.11artifactId>
<version>2.4.1version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-jsonartifactId>
<version>${flink.version}version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-clients_${scala.binary.version}artifactId>
<version>${flink.version}version>
dependency>
<dependency>
<groupId>org.slf4jgroupId>
<artifactId>slf4j-log4j12artifactId>
<version>1.7.7version>
dependency>
<dependency>
<groupId>log4jgroupId>
<artifactId>log4jartifactId>
<version>1.2.17version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-statebackend-rocksdb_${scala.binary.version}artifactId>
<version>${flink.version}version>
dependency>
<dependency>
<groupId>com.alibabagroupId>
<artifactId>fastjsonartifactId>
<version>1.2.54version>
dependency>
<dependency>
<groupId>org.yamlgroupId>
<artifactId>snakeyamlartifactId>
<version>1.27version>
dependency>
<dependency>
<groupId>com.oraclegroupId>
<artifactId>ojdbc6artifactId>
<version>11.2.0.3version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-connector-hive_${scala.binary.version}artifactId>
<version>${flink.version}version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-connector-filesystem_${scala.binary.version}artifactId>
<version>${flink.version}version>
dependency>
<dependency>
<groupId>org.apache.bahirgroupId>
<artifactId>flink-connector-redis_${scala.binary.version}artifactId>
<version>1.0version>
dependency>
<dependency>
<groupId>dom4jgroupId>
<artifactId>dom4jartifactId>
<version>1.6.1version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-shaded-hadoop-2-uberartifactId>
<version>2.6.5-8.0version>
dependency>
<dependency>
<groupId>mysqlgroupId>
<artifactId>mysql-connector-javaartifactId>
<version>5.1.48version>
dependency>
<dependency>
<groupId>org.apache.thriftgroupId>
<artifactId>libfb303artifactId>
<version>0.9.0version>
dependency>
<dependency>
<groupId>org.apache.hivegroupId>
<artifactId>hive-metastoreartifactId>
<version>1.2.1version>
<exclusions>
<exclusion>
<groupId>org.apache.hadoopgroupId>
<artifactId>*artifactId>
exclusion>
<exclusion>
<groupId>commons-cligroupId>
<artifactId>*artifactId>
exclusion>
exclusions>
dependency>
<dependency>
<groupId>org.apache.hivegroupId>
<artifactId>hive-execartifactId>
<version>1.2.1version>
<exclusions>
<exclusion>
<groupId>commons-cligroupId>
<artifactId>*artifactId>
exclusion>
<exclusion>
<groupId>com.googlegroupId>
<artifactId>*artifactId>
exclusion>
<exclusion>
<groupId>org.apache.calcitegroupId>
<artifactId>*artifactId>
exclusion>
exclusions>
dependency>
<dependency>
<groupId>junitgroupId>
<artifactId>junitartifactId>
<version>4.12version>
<scope>compilescope>
dependency>
<dependency>
<groupId>org.projectlombokgroupId>
<artifactId>lombokartifactId>
<version>1.18.6version>
dependency>
<dependency>
<groupId>com.alibabagroupId>
<artifactId>druidartifactId>
<version>1.1.17version>
dependency>
<dependency>
<groupId>org.apache.hivegroupId>
<artifactId>hive-jdbcartifactId>
<version>${hive-jdbc.version}version>
<exclusions>
<exclusion>
<groupId>org.eclipse.jetty.aggregategroupId>
<artifactId>*artifactId>
exclusion>
<exclusion>
<groupId>org.slf4jgroupId>
<artifactId>slf4j-log4j12artifactId>
exclusion>
exclusions>
dependency>
<dependency>
<groupId>redis.clientsgroupId>
<artifactId>jedisartifactId>
<version>2.8.0version>
dependency>
dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.pluginsgroupId>
<artifactId>maven-shade-pluginartifactId>
<version>3.0.0version>
<executions>
<execution>
<phase>packagephase>
<goals>
<goal>shadegoal>
goals>
<configuration>
<artifactSet>
<excludes>
<exclude>org.apache.flink:force-shadingexclude>
<exclude>com.google.code.findbugs:jsr305exclude>
<exclude>org.slf4j:*exclude>
<exclude>log4j:*exclude>
excludes>
artifactSet>
<filters>
<filter>
<artifact>*:*artifact>
<excludes>
<exclude>META-INF/*.SFexclude>
<exclude>META-INF/*.DSAexclude>
<exclude>META-INF/*.RSAexclude>
excludes>
filter>
filters>
<transformers>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>com.tcloudata.AppmainClass>
transformer>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
transformers>
configuration>
execution>
executions>
plugin>
<plugin>
<groupId>org.apache.maven.pluginsgroupId>
<artifactId>maven-compiler-pluginartifactId>
<version>3.1version>
<configuration>
<source>1.8source>
<target>1.8target>
configuration>
plugin>
plugins>
build>
project>