flink 读取kafka 数据写入mysql

创建flink环境:

StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
EnvironmentSettings build = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build();
StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env, build);

创建连接kafka表:

String sql =
        "CREATE TABLE kafka_job (\n" +
                "  tab STRING,\n" +
                "  job_name STRING,\n" +
                "  job_salary int,\n" +
                "  job_address STRING,\n" +
                "  job_exp STRING,\n" +
                "  education STRING,\n" +
                "  company_name STRING,\n" +
                "  demand STRING,\n" +
                "  welfare STRING\n" +
                ")\n" +
                "WITH (\n" +
                "  'connector' = 'kafka',\n" +
                "  'topic' = 'pa',\n" +
                "  'properties.bootstrap.servers' = 'hadoop111:9092',\n" +
                "  'format' = 'csv',\n" +
                "  'csv.field-delimiter' = ',',\n" +
                "  'csv.ignore-parse-errors' = 'true'\n" +
                ")";

tableEnv.executeSql(sql);
Table t_user = tableEnv.from("kafka_job");

创建写入mysql表:

String sql2 =
        "CREATE TABLE t1_avg (\n" +
                " tab VARCHAR(255),\n" +
                " avg_salary DECIMAL(10, 2),\n" +
                " unix_t BIGINT ," +
                "PRIMARY KEY (unix_t) NOT ENFORCED \n" +
                ")\n" +
                " WITH\n" +
                "(\n" +
                "'connector' = 'jdbc',\n" +
                "'url' = 'jdbc:mysql://localhost:3306/flaskdb?serverTimezone=Asia/Shanghai&zeroDaeTimeBehavior=convertToNull&useSSL=false',\n" +
                "'driver' = 'com.mysql.jdbc.Driver',\n" +
                "'username' = 'root',\n" +
                "'password' = 'root',\n" +
                "'table-name' = 't1_avg',\n" +
                "'lookup.cache.max-rows' = '1000',\n" +
                "'lookup.cache.ttl' = '60000'\n" +
                ")";
tableEnv.executeSql(sql2);

编写插入表:

String insert = "insert into t1_avg\n" +
        "SELECT *\n" +
        "FROM (\n" +
        "  SELECT tab, ROUND(AVG(job_salary), 2) AS avg_salary, UNIX_TIMESTAMP() AS unix_t\n" +
        "  FROM kafka_job\n" +
        "  GROUP BY tab\n" +
        ") AS subquery";
tableEnv.executeSql(insert);

最终代码:

package flink;

import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


public class t1 {
    public static void main(String[] args) {
        Logger log = LoggerFactory.getLogger(t1.class);
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(1);
        EnvironmentSettings build = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build();
        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env, build);

        String sql =
                "CREATE TABLE kafka_job (\n" +
                        "  tab STRING,\n" +
                        "  job_name STRING,\n" +
                        "  job_salary int,\n" +
                        "  job_address STRING,\n" +
                        "  job_exp STRING,\n" +
                        "  education STRING,\n" +
                        "  company_name STRING,\n" +
                        "  demand STRING,\n" +
                        "  welfare STRING\n" +
                        ")\n" +
                        "WITH (\n" +
                        "  'connector' = 'kafka',\n" +
                        "  'topic' = 'pa',\n" +
                        "  'properties.bootstrap.servers' = 'hadoop111:9092',\n" +
                        "  'format' = 'csv',\n" +
                        "  'csv.field-delimiter' = ',',\n" +
                        "  'csv.ignore-parse-errors' = 'true'\n" +
                        ")";

        tableEnv.executeSql(sql);
        Table t_user = tableEnv.from("kafka_job");

        DataStream rowDataStream = tableEnv.toAppendStream(t_user, Row.class);
        rowDataStream.print();

        t_user.printSchema();


        String sql2 =
                "CREATE TABLE t1_avg (\n" +
                        " tab VARCHAR(255),\n" +
                        " avg_salary DECIMAL(10, 2),\n" +
                        " unix_t BIGINT ," +
                        "PRIMARY KEY (unix_t) NOT ENFORCED \n" +
                        ")\n" +
                        " WITH\n" +
                        "(\n" +
                        "'connector' = 'jdbc',\n" +
                        "'url' = 'jdbc:mysql://localhost:3306/flaskdb?serverTimezone=Asia/Shanghai&zeroDaeTimeBehavior=convertToNull&useSSL=false',\n" +
                        "'driver' = 'com.mysql.jdbc.Driver',\n" +
                        "'username' = 'root',\n" +
                        "'password' = 'root',\n" +
                        "'table-name' = 't1_avg',\n" +
                        "'lookup.cache.max-rows' = '1000',\n" +
                        "'lookup.cache.ttl' = '60000'\n" +
                        ")";
        tableEnv.executeSql(sql2);

        String insert = "insert into t1_avg\n" +
                "SELECT *\n" +
                "FROM (\n" +
                "  SELECT tab, ROUND(AVG(job_salary), 2) AS avg_salary, UNIX_TIMESTAMP() AS unix_t\n" +
                "  FROM kafka_job\n" +
                "  GROUP BY tab\n" +
                ") AS subquery";
        tableEnv.executeSql(insert);

        try {
            env.execute("flink_running");
        } catch (Exception e) {
            log.info("抛出异常!");
            System.out.println(e.getMessage());
        }
    }
}

你可能感兴趣的:(flink,kafka,mysql)