这里的 maven 依赖比较冗余,推荐大家都加上,后面陆续优化。
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0modelVersion>
<groupId>org.examplegroupId>
<artifactId>flink-kafka2mqartifactId>
<version>1.0-SNAPSHOTversion>
<properties>
<hbase.version>2.3.3hbase.version>
<hadoop.version>3.1.1hadoop.version>
<spark.version>3.0.2spark.version>
<scala.version>2.12.10scala.version>
<maven.compiler.source>8maven.compiler.source>
<maven.compiler.target>8maven.compiler.target>
<project.build.sourceEncoding>UTF-8project.build.sourceEncoding>
<flink.version>1.14.6flink.version>
<scala.binary.version>2.12scala.binary.version>
<target.java.version>1.8target.java.version>
<maven.compiler.source>${target.java.version}maven.compiler.source>
<maven.compiler.target>${target.java.version}maven.compiler.target>
<log4j.version>2.17.2log4j.version>
<hadoop.version>3.1.2hadoop.version>
<hive.version>3.1.2hive.version>
properties>
<dependencies>
<dependency>
<groupId>org.apache.rocketmqgroupId>
<artifactId>rocketmq-clientartifactId>
<version>4.8.0version>
dependency>
<dependency>
<groupId>io.nettygroupId>
<artifactId>netty-allartifactId>
<version>4.1.68.Finalversion>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-connector-jdbc_${scala.binary.version}artifactId>
<version>${flink.version}version>
dependency>
<dependency>
<groupId>org.jyamlgroupId>
<artifactId>jyamlartifactId>
<version>1.3version>
dependency>
<dependency>
<groupId>gaei.cn.x5lgroupId>
<artifactId>tsp-gb-decodeartifactId>
<version>1.0.0version>
<exclusions>
<exclusion>
<groupId>org.apache.logging.log4jgroupId>
<artifactId>log4j-coreartifactId>
exclusion>
<exclusion>
<groupId>org.apache.logging.log4jgroupId>
<artifactId>log4j-apiartifactId>
exclusion>
<exclusion>
<groupId>org.apache.logging.log4jgroupId>
<artifactId>log4j-slf4j-implartifactId>
exclusion>
exclusions>
dependency>
<dependency>
<groupId>mysqlgroupId>
<artifactId>mysql-connector-javaartifactId>
<version>5.1.44version>
<scope>runtimescope>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-shaded-hadoop-3artifactId>
<version>3.1.1.7.2.8.0-224-9.0version>
<exclusions>
<exclusion>
<artifactId>slf4j-log4j12artifactId>
<groupId>org.slf4jgroupId>
exclusion>
<exclusion>
<groupId>org.apache.logging.log4jgroupId>
<artifactId>log4j-coreartifactId>
exclusion>
<exclusion>
<groupId>org.apache.logging.log4jgroupId>
<artifactId>log4j-apiartifactId>
exclusion>
<exclusion>
<groupId>org.apache.logging.log4jgroupId>
<artifactId>log4j-slf4j-implartifactId>
exclusion>
<exclusion>
<groupId>log4jgroupId>
<artifactId>log4jartifactId>
exclusion>
exclusions>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-sql-connector-kafka_${scala.binary.version}artifactId>
<version>${flink.version}version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-javaartifactId>
<version>${flink.version}version>
<scope>providedscope>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-streaming-java_${scala.binary.version}artifactId>
<version>${flink.version}version>
<scope>providedscope>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-clients_${scala.binary.version}artifactId>
<version>${flink.version}version>
<scope>providedscope>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-streaming-scala_${scala.binary.version}artifactId>
<version>${flink.version}version>
<scope>providedscope>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-sql-connector-kafka_${scala.binary.version}artifactId>
<version>${flink.version}version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-state-processor-api_${scala.binary.version}artifactId>
<version>${flink.version}version>
<scope>providedscope>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-connector-kafka_${scala.binary.version}artifactId>
<version>${flink.version}version>
dependency>
<dependency>
<groupId>commons-langgroupId>
<artifactId>commons-langartifactId>
<version>2.5version>
<scope>compilescope>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-runtime-web_${scala.binary.version}artifactId>
<version>${flink.version}version>
<scope>providedscope>
dependency>
<dependency>
<groupId>org.apache.logging.log4jgroupId>
<artifactId>log4j-slf4j-implartifactId>
<version>${log4j.version}version>
<scope>runtimescope>
dependency>
<dependency>
<groupId>org.apache.logging.log4jgroupId>
<artifactId>log4j-apiartifactId>
<version>${log4j.version}version>
<scope>runtimescope>
dependency>
<dependency>
<groupId>org.apache.logging.log4jgroupId>
<artifactId>log4j-coreartifactId>
<version>${log4j.version}version>
<scope>runtimescope>
dependency>
<dependency>
<groupId>org.apache.hadoopgroupId>
<artifactId>hadoop-clientartifactId>
<version>3.3.1version>
dependency>
<dependency>
<groupId>org.apache.hadoopgroupId>
<artifactId>hadoop-authartifactId>
<version>${hadoop.version}version>
dependency>
<dependency>
<groupId>org.apache.flinkgroupId>
<artifactId>flink-statebackend-rocksdb_${scala.binary.version}artifactId>
<version>${flink.version}version>
<scope>providedscope>
dependency>
<dependency>
<groupId>com.alibabagroupId>
<artifactId>fastjsonartifactId>
<version>1.1.23version>
dependency>
<dependency>
<groupId>org.projectlombokgroupId>
<artifactId>lombokartifactId>
<version>1.16.18version>
<scope>providedscope>
dependency>
dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.pluginsgroupId>
<artifactId>maven-shade-pluginartifactId>
<version>3.0.0version>
<executions>
<execution>
<phase>packagephase>
<goals>
<goal>shadegoal>
goals>
<configuration>
<createDependencyReducedPom>falsecreateDependencyReducedPom>
<artifactSet>
<excludes>
<exclude>org.apache.flink:force-shadingexclude>
<exclude>com.google.code.findbugs:jsr305exclude>
<exclude>org.slf4j:*exclude>
<exclude>org.apache.logging.log4j:*exclude>
<exclude>org.apache.flink:flink-runtime-web_2.11exclude>
excludes>
artifactSet>
<filters>
<filter>
<artifact>*:*artifact>
<excludes>
<exclude>META-INF/*.SFexclude>
<exclude>META-INF/*.DSAexclude>
<exclude>META-INF/*.RSAexclude>
excludes>
filter>
filters>
<transformers>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>com.owp.flink.kafka.KafkaSourceDemomainClass>
transformer>
<transformer
implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
transformers>
configuration>
execution>
executions>
plugin>
<plugin>
<artifactId>maven-assembly-pluginartifactId>
<configuration>
<finalName>RocketMQProducerDemofinalName>
<appendAssemblyId>falseappendAssemblyId>
<descriptorRefs>
<descriptorRef>jar-with-dependenciesdescriptorRef>
descriptorRefs>
<archive>
<manifest>
<mainClass>kafka2mq.api.mq.RocketMQProducerDemomainClass>
manifest>
archive>
configuration>
<executions>
<execution>
<id>make-assemblyid>
<phase>packagephase>
<goals>
<goal>assemblygoal>
goals>
execution>
executions>
plugin>
plugins>
<pluginManagement>
<plugins>
<plugin>
<groupId>org.eclipse.m2egroupId>
<artifactId>lifecycle-mappingartifactId>
<version>1.0.0version>
<configuration>
<lifecycleMappingMetadata>
<pluginExecutions>
<pluginExecution>
<pluginExecutionFilter>
<groupId>org.apache.maven.pluginsgroupId>
<artifactId>maven-shade-pluginartifactId>
<versionRange>[3.0.0,)versionRange>
<goals>
<goal>shadegoal>
goals>
pluginExecutionFilter>
<action>
<ignore/>
action>
pluginExecution>
<pluginExecution>
<pluginExecutionFilter>
<groupId>org.apache.maven.pluginsgroupId>
<artifactId>maven-compiler-pluginartifactId>
<versionRange>[3.1,)versionRange>
<goals>
<goal>testCompilegoal>
<goal>compilegoal>
goals>
pluginExecutionFilter>
<action>
<ignore/>
action>
pluginExecution>
pluginExecutions>
lifecycleMappingMetadata>
configuration>
plugin>
plugins>
pluginManagement>
build>
<repositories>
<repository>
<id>cdh.releases.repoid>
<url>https://repository.cloudera.com/artifactory/libs-release-local/url>
<name>Releases Repositoryname>
repository>
repositories>
project>
注意:
1、此程序中所有的相关配置都是通过 Mysql 读取的(生产环境中没有直接写死的,都是通过配置文件动态配置),大家实际测试过程中可以将相关配置信息写死。
2、此程序中 Kafka 涉及到了 Kerberos 认证操作,大家的操作环境中没有的话可以去掉。
import cdp.kafka2mq.test.conf.ConfigTools;
import cdp.kafka2mq.test.function.MqSinkFunction;
import cdp.kafka2mq.test.utils.CommonUtils;
import org.apache.flink.kafka.shaded.org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.rocketmq.client.producer.DefaultMQProducer;
import java.util.*;
public class Test {
// public static Logger logger = Logger.getLogger(Test.class);
public static void main(String[] args) throws Exception {
ConfigTools.initMySqlConf(args[0], Test.class);
Map<String, Object> mapConf = ConfigTools.mapConf;
Map<String, Object> mqProducer = (Map<String, Object>) mapConf.get("mq-producer");
Map<String, Object> kafkaConsumerConf = (Map<String, Object>) mapConf.get("kafka-consumer");
String mqTopic = String.valueOf(mqProducer.get("defaultTopic"));
System.out.println("mq-topic:" + mqTopic);
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment().disableOperatorChaining();
//自定义source
FlinkKafkaConsumer<ConsumerRecord<String, String>> myConsumer = CommonUtils.getKafkaConsumer(kafkaConsumerConf);
DataStream<ConsumerRecord<String, String>> stream = env.addSource(myConsumer);
//自定义sink
stream.addSink(new MqSinkFunction(mqTopic, mapConf)).setParallelism(1);
env.execute();
}
}
读取 Mysql 中的配置
package cdp.kafka2mq.test.conf;
import com.alibaba.fastjson.JSON;
import cdp.kafka2mq.test.utils.DBConn;
import lombok.extern.slf4j.Slf4j;
import org.ho.yaml.Yaml;
import java.io.InputStream;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
@Slf4j
public class ConfigTools {
public static Map<String, Object> mapConf;
/**
* 获取对应的配置文件
*
* @param option
*/
public static void initConf(String option) {
String confFile = "/flink_backup_" + option + ".yml";
try {
InputStream dumpFile = ConfigTools.class.getResourceAsStream(confFile);
mapConf = Yaml.loadType(dumpFile, HashMap.class);
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* 获取对应的配置文件
*
* @param option
*/
public static void initMySqlConf(String option, Class clazz) {
String className = clazz.getName();
String confFile = "/appconfig.yml";
Map<String, String> mysqlConf;
try {
InputStream dumpFile = ConfigTools.class.getResourceAsStream(confFile);
mysqlConf = Yaml.loadType(dumpFile, HashMap.class);
String username = mysqlConf.get("mysql.username");
String password = mysqlConf.get("mysql.password");
String url = mysqlConf.get("mysql.url");
Connection conn = DBConn.conn(url, username, password);
Map<String, Object> config = getConfig(conn, className, option);
if (config == null || config.size() == 0) {
log.error("获取配置文件失败");
return;
}
mapConf = config;
} catch (Exception e) {
e.printStackTrace();
}
}
private static Map<String, Object> getConfig(Connection conn, String className, String option) throws SQLException {
PreparedStatement preparedStatement = null;
try {
String sql = "select config_context from base_app_config where app_name = '%s' and config_name = '%s'";
preparedStatement = conn.prepareStatement(String.format(sql, className, option));
ResultSet rs = preparedStatement.executeQuery();
Map<String, String> map = new LinkedHashMap<>();
String config_context = "";
while (rs.next()) {
config_context = rs.getString("config_context");
}
System.out.println("配置信息config_context:"+config_context);
Map<String, Object> mysqlConfMap = JSON.parseObject(config_context, Map.class);
return mysqlConfMap;
}finally {
if (preparedStatement != null) {
preparedStatement.close();
}
if (conn != null) {
conn.close();
}
}
}
public static void main(String[] args) {
initConf("local");
String s = JSON.toJSONString(mapConf);
System.out.println(s);
}
}
Mysql 连接工具类
package cdp.kafka2mq.test.utils;
import java.sql.*;
public class DBConn {
private static final String driver = "com.mysql.jdbc.Driver"; //mysql驱动
private static Connection conn = null;
private static PreparedStatement ps = null;
private static ResultSet rs = null;
private static final CallableStatement cs = null;
/**
* 连接数据库
* @return
*/
public static Connection conn(String url,String username,String password) {
Connection conn = null;
try {
Class.forName(driver); //加载数据库驱动
try {
conn = DriverManager.getConnection(url, username, password); //连接数据库
} catch (SQLException e) {
e.printStackTrace();
}
} catch (ClassNotFoundException e) {
e.printStackTrace();
}
return conn;
}
/**
* 关闭数据库链接
* @return
*/
public static void close() {
if(conn != null) {
try {
conn.close(); //关闭数据库链接
} catch (SQLException e) {
e.printStackTrace();
}
}
}
}
Kafka 消费工具类
package cdp.kafka2mq.test.utils;
import cdp.kafka2mq.test.conf.ConfigTools;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.common.time.Time;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.contrib.streaming.state.RocksDBStateBackend;
import org.apache.flink.kafka.shaded.org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import org.apache.flink.streaming.connectors.kafka.KafkaDeserializationSchema;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.concurrent.TimeUnit;
@Slf4j
public class CommonUtils {
public static FlinkKafkaConsumer<ConsumerRecord<String, String>> getKafkaConsumer(Map<String, Object> kafkaConf) throws IOException {
String[] topics = ((String) kafkaConf.get("topics")).split(",");
log.info("监听的topic: {}", topics);
Properties properties = new Properties();
Map<String, String> kafkaProp = (Map<String, String>) kafkaConf.get("prop");
for (String key : kafkaProp.keySet()) {
properties.setProperty(key, kafkaProp.get(key).toString());
}
if (!StringUtils.isBlank((String) kafkaProp.get("isKerberized")) && "1".equals(kafkaProp.get("isKerberized"))) {
System.setProperty("java.security.krb5.conf", kafkaProp.get("krb5Conf"));
properties.put("security.protocol", kafkaProp.get("security_protocol"));
properties.put("sasl.jaas.config", "com.sun.security.auth.module.Krb5LoginModule required "
+ "useTicketCache=" + kafkaProp.get("useTicketCache") + " "
+ "serviceName=\"" + kafkaProp.get("serviceName") + "\" "
+ "useKeyTab=true "
+ "keyTab=\"" + kafkaProp.get("keytab").toString() + "\" "
+ "principal=\"" + kafkaProp.get("principal").toString() + "\";");
}
properties.put("key.serializer", "org.apache.flink.kafka.shaded.org.apache.kafka.common.serialization.ByteArrayDeserializer");
properties.put("value.serializer", "org.apache.flink.kafka.shaded.org.apache.kafka.common.serialization.ByteArrayDeserializer");
FlinkKafkaConsumer<ConsumerRecord<String, String>> consumerRecordFlinkKafkaConsumer = new FlinkKafkaConsumer<ConsumerRecord<String, String>>(Arrays.asList(topics), new KafkaDeserializationSchema<ConsumerRecord<String, String>>() {
@Override
public TypeInformation<ConsumerRecord<String, String>> getProducedType() {
return TypeInformation.of(new TypeHint<ConsumerRecord<String, String>>() {
});
}
@Override
public boolean isEndOfStream(ConsumerRecord<String, String> stringStringConsumerRecord) {
return false;
}
@Override
public ConsumerRecord<String, String> deserialize(ConsumerRecord<byte[], byte[]> record) throws Exception {
return new ConsumerRecord<String, String>(
record.topic(),
record.partition(),
record.offset(),
record.timestamp(),
record.timestampType(),
record.checksum(),
record.serializedKeySize(),
record.serializedValueSize(),
new String(record.key() == null ? "".getBytes(StandardCharsets.UTF_8) : record.key(), StandardCharsets.UTF_8),
new String(record.value() == null ? "{}".getBytes(StandardCharsets.UTF_8) : record.value(), StandardCharsets.UTF_8));
}
}, properties);
return consumerRecordFlinkKafkaConsumer;
}
}
自定义 sink-function 实现数据写入 RocketMQ
package cdp.kafka2mq.test.function;
import cdp.kafka2mq.test.conf.ConfigTools;
import com.alibaba.fastjson.JSON;
import org.apache.commons.lang3.StringUtils;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.kafka.shaded.org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.rocketmq.client.producer.DefaultMQProducer;
import org.apache.rocketmq.client.producer.SendResult;
import org.apache.rocketmq.common.message.Message;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.Map;
public class MqSinkFunction extends RichSinkFunction<ConsumerRecord<String, String>> {
private static DefaultMQProducer producer = new DefaultMQProducer();
private String topic;
private Map<String, Object> confMap;
public MqSinkFunction(String topic, Map<String, Object> confMap) {
this.topic = topic;
this.confMap = confMap;
}
@Override
public void open(Configuration parameters) throws Exception {
// super.open(parameters);
System.out.println("confMap: " + confMap);
Map<String, Object> mqProducer = (Map<String, Object>) confMap.get("mq-producer");
Map<String, Object> mqProp = (Map<String, Object>) mqProducer.get("prop");
String groupId = String.valueOf(mqProp.get("group"));
String srvAddr = String.valueOf(mqProp.get("server.address"));
int retries = Integer.parseInt(String.valueOf(mqProp.get("retries")));
System.out.println("mq生产者组:" + groupId);
System.out.println("mq地址:" + srvAddr);
System.out.println("retries:" + retries);
producer.setProducerGroup(groupId);
producer.setNamesrvAddr(srvAddr);
producer.setRetryTimesWhenSendFailed(retries);
producer.setUseTLS(true);
producer.start();
}
@Override
public void invoke(ConsumerRecord<String, String> record, Context context) throws Exception {
String message = record.value();
// System.out.println(message);
HashMap<String, Object> infoMap = JSON.parseObject(message, HashMap.class);
// System.out.println(infoMap);
String id = String.valueOf(infoMap.get("id"));
if (StringUtils.isBlank(id)) {
return;
}
Message msg = new Message();
msg.setTopic(topic);
msg.setTags(id);
msg.setBody(message.getBytes(StandardCharsets.UTF_8));
msg.setTransactionId(id);
// System.out.println("msg:" + msg);
System.out.println("send前");
SendResult send = producer.send(msg);
System.out.printf("%s%n", send);
System.out.println("send后");
}
}
其他配置文件
mysql.url: "jdbc:mysql://ip:3306/dbName?useSSL=false"
mysql.username: "username"
mysql.password: "password"
mysql.driver: "com.mysql.jdbc.Driver"
log4j.rootLogger=info, stdout
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n
<configuration monitorInterval="5">
<Properties>
<property name="LOG_PATTERN" value="%date{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n" />
<property name="LOG_LEVEL" value="ERROR" />
Properties>
<appenders>
<console name="console" target="SYSTEM_OUT">
<PatternLayout pattern="${LOG_PATTERN}"/>
<ThresholdFilter level="${LOG_LEVEL}" onMatch="ACCEPT" onMismatch="DENY"/>
console>
<File name="log" fileName="tmp/log/job.log" append="false">
<PatternLayout pattern="%d{HH:mm:ss.SSS} %-5level %class{36} %L %M - %msg%xEx%n"/>
File>
appenders>
<loggers>
<root level="${LOG_LEVEL}">
<appender-ref ref="console"/>
<appender-ref ref="log"/>
root>
loggers>
configuration>