FlinkCDC快速搭建实现数据监控

引入依赖

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0modelVersion>

    <groupId>com.sandgroupId>
    <artifactId>flinkcdcartifactId>
    <version>1.0-SNAPSHOTversion>
    <packaging>jarpackaging>

    <name>Flink Quickstart Jobname>

    <properties>
        <project.build.sourceEncoding>UTF-8project.build.sourceEncoding>
        <flink.version>1.17.1flink.version>
        
        <target.java.version>1.8target.java.version>
        <scala.binary.version>2.12scala.binary.version>
        <maven.compiler.source>${target.java.version}maven.compiler.source>
        <maven.compiler.target>${target.java.version}maven.compiler.target>
        <log4j.version>2.17.1log4j.version>
    properties>

    <repositories>
        <repository>
            <id>apache.snapshotsid>
            <name>Apache Development Snapshot Repositoryname>
            <url>https://repository.apache.org/content/repositories/snapshots/url>
            <releases>
                <enabled>falseenabled>
            releases>
            <snapshots>
                <enabled>trueenabled>
            snapshots>
        repository>
    repositories>

    <dependencies>


        <dependency>
            <groupId>org.apache.flinkgroupId>
            <artifactId>flink-javaartifactId>
            <version>${flink.version}version>
        dependency>

        <dependency>
            <groupId>org.apache.flinkgroupId>
            
            <artifactId>flink-streaming-javaartifactId>
            <version>${flink.version}version>
        dependency>
        
        
        
        
        

        <dependency>
            <groupId>org.apache.flinkgroupId>
            
            <artifactId>flink-clientsartifactId>
            <version>${flink.version}version>
        dependency>

        <dependency>
            <groupId>mysqlgroupId>
            <artifactId>mysql-connector-javaartifactId>
            <version>5.1.49version>
        dependency>



        <dependency>
            <groupId>org.apache.flinkgroupId>
            <artifactId>flink-coreartifactId>
            <version>${flink.version}version>
        dependency>


        <dependency>
            <groupId>org.apache.flinkgroupId>
            <artifactId>flink-table-planner_2.12artifactId>
            <version>${flink.version}version>
        dependency>

        
        <dependency>
            <groupId>org.apache.flinkgroupId>
            <artifactId>flink-connector-elasticsearch7artifactId>
            <version>3.0.1-1.17version>
        dependency>

        <dependency>
            <groupId>com.ververicagroupId>
            <artifactId>flink-connector-mysql-cdcartifactId>
            
            <version>2.4.0version>
        dependency>

        <dependency>
            <groupId>org.apache.flinkgroupId>
            <artifactId>flink-jsonartifactId>
            <version>${flink.version}version>
        dependency>
        <dependency>
            <groupId>com.alibabagroupId>
            <artifactId>fastjsonartifactId>
            <version>1.2.75version>
        dependency>
        
        <dependency>
            <groupId>org.apache.flinkgroupId>
            <artifactId>flink-connector-jdbcartifactId>
            <version>3.1.1-1.17version>
            <scope>providedscope>
        dependency>
        <dependency>
            <groupId>com.alibabagroupId>
            <artifactId>fastjsonartifactId>
            <version>2.0.32version>
        dependency>
        
        <dependency>
            <groupId>org.slf4jgroupId>
            <artifactId>slf4j-log4j12artifactId>
            <version>1.7.30version>
        dependency>
        <dependency>
            <groupId>log4jgroupId>
            <artifactId>log4jartifactId>
            <version>1.2.16version>
        dependency>
        <dependency>
            <groupId>org.apache.logging.log4jgroupId>
            <artifactId>log4j-coreartifactId>
            <version>2.17.2version>
        dependency>

        <dependency>
            <groupId>mysqlgroupId>
            <artifactId>mysql-connector-javaartifactId>
            <version>8.0.30version>
        dependency>

        <dependency>
            <groupId>cn.hutoolgroupId>
            <artifactId>hutool-allartifactId>
            <version>5.7.10version>
        dependency>
    dependencies>
    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.pluginsgroupId>
                <artifactId>maven-compiler-pluginartifactId>
                <version>3.1version>
                <configuration>
                    <source>${target.java.version}source>
                    <target>${target.java.version}target>
                configuration>
            plugin>
            <plugin>
                <groupId>org.apache.maven.pluginsgroupId>
                <artifactId>maven-shade-pluginartifactId>
                <version>3.1.1version>
                <executions>
                    
                    <execution>
                        <phase>packagephase>
                        <goals>
                            <goal>shadegoal>
                        goals>
                        <configuration>
                            <createDependencyReducedPom>falsecreateDependencyReducedPom>
                            <artifactSet>
                                <excludes>
                                    <exclude>org.apache.flink:flink-shaded-force-shadingexclude>
                                    <exclude>com.google.code.findbugs:jsr305exclude>
                                    <exclude>org.slf4j:*exclude>
                                    <exclude>org.apache.logging.log4j:*exclude>
                                excludes>
                            artifactSet>
                            <filters>
                                <filter>

                                    <artifact>*:*artifact>
                                    <excludes>
                                        <exclude>META-INF/*.SFexclude>
                                        <exclude>META-INF/*.DSAexclude>
                                        <exclude>META-INF/*.RSAexclude>
                                    excludes>
                                filter>
                            filters>
                            <transformers>
                                <transformer
                                        implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
                                <transformer
                                        implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
                                    <mainClass>com.sand.DataStreamJobmainClass>
                                transformer>
                            transformers>
                        configuration>
                    execution>
                executions>
            plugin>
        plugins>

        <pluginManagement>
            <plugins>

                
                <plugin>
                    <groupId>org.eclipse.m2egroupId>
                    <artifactId>lifecycle-mappingartifactId>
                    <version>1.0.0version>
                    <configuration>
                        <lifecycleMappingMetadata>
                            <pluginExecutions>
                                <pluginExecution>
                                    <pluginExecutionFilter>
                                        <groupId>org.apache.maven.pluginsgroupId>
                                        <artifactId>maven-shade-pluginartifactId>
                                        <versionRange>[3.1.1,)versionRange>
                                        <goals>
                                            <goal>shadegoal>
                                        goals>
                                    pluginExecutionFilter>
                                    <action>
                                        <ignore/>
                                    action>
                                pluginExecution>
                                <pluginExecution>
                                    <pluginExecutionFilter>
                                        <groupId>org.apache.maven.pluginsgroupId>
                                        <artifactId>maven-compiler-pluginartifactId>
                                        <versionRange>[3.1,)versionRange>
                                        <goals>
                                            <goal>testCompilegoal>
                                            <goal>compilegoal>
                                        goals>
                                    pluginExecutionFilter>
                                    <action>
                                        <ignore/>
                                    action>
                                pluginExecution>
                            pluginExecutions>
                        lifecycleMappingMetadata>
                    configuration>
                plugin>


            plugins>
        pluginManagement>
    build>
project>

数据库配置类

package com.sand;

import org.apache.commons.collections.CollectionUtils;

import java.util.Arrays;
import java.util.List;
import java.util.StringJoiner;

/**
 * @author zdd
 */
public class CDCKit {

    public static void main(String[] args) {
        String tempDir = System.getProperty("java.io.tmpdir");
        System.out.println("tempDir = " + tempDir);

    }
    /**
     * 数据库
     */
    private static final String database = "byyy_iowtb_wms_test";

    /**
     * 表名
     */
    private static final List<String> tableList = Arrays.asList(
            "inv_tt_stock_info",
            "base_tm_sku",
            "base_tm_third_sku_certificate",
            "base_tm_sku_gsp"
    );

    /**
     * ip
     */
    private static final String hostname = "192.168.111.107";

    /**
     * 端口
     */
    private static final int port = 3306;

    /**
     * 用户名
     */
    private static final String username = "test_cdc";

    /**
     * 密码
     */
    private static final String password = "Test_cdc@123";



    public static String getDatabase() {
        return database;
    }

    public static String getTableList() {
        if (CollectionUtils.isEmpty(tableList)) {
            return null;
        }
        //,分割
        StringJoiner stringJoiner = new StringJoiner(",");
        for (String tableName : tableList) {
            stringJoiner.add(getDatabase() + "." + tableName);
        }
        return stringJoiner.toString();
    }


    public static String getHostname() {
        return hostname;
    }

    public static int getPort() {
        return port;
    }

    public static String getUsername() {
        return username;
    }

    public static String getPassword() {
        return password;
    }

}

监控类



package com.sand;


import cn.hutool.core.io.FileUtil;
import com.ververica.cdc.connectors.mysql.source.MySqlSource;
import com.ververica.cdc.connectors.mysql.table.StartupOptions;
import com.ververica.cdc.debezium.JsonDebeziumDeserializationSchema;
import org.apache.commons.lang3.StringUtils;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.runtime.jobgraph.SavepointConfigOptions;
import org.apache.flink.runtime.state.filesystem.FsStateBackend;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

import java.io.File;
import java.util.Objects;
import java.util.Properties;


public class DataStreamJob {
    public static void main(String[] args) throws Exception {
        //获取临时文件目录
        String tempDir = System.getProperty("java.io.tmpdir");
        String latestCheckpoint = getLatestCheckpoint();
        System.out.println("latestCheckpoint = " + latestCheckpoint);

        Configuration configuration = new Configuration();
        if(StringUtils.isNotBlank(latestCheckpoint)){
            configuration.setString("execution.savepoint.path", "file:///" + latestCheckpoint);
        }
        StreamExecutionEnvironment env =
                StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(configuration);

        env.setParallelism(1);
        //2.1 开启 Checkpoint,每隔 60 秒钟做一次 CK
        env.enableCheckpointing(1000L * 60);
        //2.2 指定 CK 的一致性语义
        env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
        //2.3 设置任务关闭的时候保留最后一次 CK 数据
        env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
        //2.4 指定从 CK 自动重启策略
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 2000L));
        //2.5 设置状态后端
        env.setStateBackend(new FsStateBackend("file:///" + tempDir + "ck"));


        // ck 设置
        env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
        Properties properties = new Properties();
        properties.setProperty("snapshot.locking.mode", "none");
        properties.setProperty("decimal.handling.mode", "string");

        MySqlSource<String> sourceFunction = MySqlSource.<String>builder()
                .hostname(CDCKit.getHostname())
                .port(CDCKit.getPort())
                .databaseList(CDCKit.getDatabase())
                .tableList(CDCKit.getTableList())
                .username(CDCKit.getUsername())
                .password(CDCKit.getPassword())
                .scanNewlyAddedTableEnabled(true)
                .deserializer(new JsonDebeziumDeserializationSchema())
                .startupOptions(StartupOptions.initial())
                .debeziumProperties(properties)
                .build();

        //4.使用 CDC Source 从 MySQL 读取数据
        env.fromSource(sourceFunction, WatermarkStrategy.noWatermarks(), "mysql-source").addSink(new MysqlSink());
        //5.打印数据
//        mysqlStream.print();
        //6.执行任务
        env.execute();
    }

    private static String getLatestCheckpoint() {
        File ckDir = new File(System.getProperty("java.io.tmpdir") + "ck");
        File[] files = ckDir.listFiles();
        if (files == null) {
            return null;
        }

        String path = null;
        long lastModified = 0;
        for (File file : files) {
            //获取文件夹下-chk-开头文件夹-最新的文件夹
            if (file.isDirectory()) {
                File[] files1 = file.listFiles();
                if (files1 == null) {
                    continue;
                }
                for (File file1 : files1) {
                    if (!file1.isDirectory() || !file1.getName().startsWith("chk-")) {
                        continue;
                    }
                    if (file1.lastModified() > lastModified) {
                        lastModified = file1.lastModified();
                        path = file1.getAbsolutePath();
                    }
                }
            }
        }
        //删除其余目录
        if (StringUtils.isEmpty(path)) {
            return null;
        }
        String tempPath = path.substring(0, path.lastIndexOf("\\"));
        for (File file : files) {
            if (file.isDirectory() && !Objects.equals(file.getAbsolutePath(), tempPath)) {
                FileUtil.del(file);
            }
        }
        return path;
    }
}

数据处理类

package com.sand;

/**
 * @author zdd
 */
public class MysqlSink implements org.apache.flink.streaming.api.functions.sink.SinkFunction<String> {
    @Override
    public void invoke(String value, org.apache.flink.streaming.api.functions.sink.SinkFunction.Context context) throws Exception {
        System.out.println("value = " + value);
    }
}

你可能感兴趣的:(flink,java)