Flink读写Kafka

本文样例基于flink 1.8.0版本介绍如何通过flink读写kafka数据

完整样例代码

import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.formats.csv.CsvRowDeserializationSchema;
import org.apache.flink.formats.csv.CsvRowSerializationSchema;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.Kafka010TableSink;
import org.apache.flink.streaming.connectors.kafka.Kafka010TableSource;
import org.apache.flink.streaming.connectors.kafka.KafkaTableSinkBase;
import org.apache.flink.streaming.connectors.kafka.KafkaTableSourceBase;
import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkFixedPartitioner;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.TableSchema;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.types.Row;

import java.util.Optional;
import java.util.Properties;

public class FlinkKafkaDemo {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        final StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
        final TableSchema tableSchema = new TableSchema(new String[]{"imsi", "lac", "cell"}, new TypeInformation[]{Types.STRING, Types.STRING, Types.STRING});
        final TypeInformation typeInfo = tableSchema.toRowType();

        final CsvRowDeserializationSchema.Builder deserSchemaBuilder = new CsvRowDeserializationSchema.Builder(typeInfo).setFieldDelimiter(',');

        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers", "hostA:6667");

        KafkaTableSourceBase kafkaTableSource = new Kafka010TableSource(
                tableSchema,
                "foo",
                properties,
                deserSchemaBuilder.build());

        tableEnv.registerTableSource("KafkaCsvTable", kafkaTableSource);

        Table kafkaCsvTable = tableEnv.scan("KafkaCsvTable");
        Table result = kafkaCsvTable.where("lac != '5'").select("imsi,lac,cell");

        DataStream ds = tableEnv.toAppendStream(result, typeInfo);

        final CsvRowSerializationSchema.Builder serSchemaBuilder = new CsvRowSerializationSchema.Builder(typeInfo).setFieldDelimiter('|').setQuoteCharacter('\0').setLineDelimiter("\r");

        KafkaTableSinkBase sink = new Kafka010TableSink(
                result.getSchema(),
                "bar",
                properties,
                Optional.of(new FlinkFixedPartitioner<>()),
                serSchemaBuilder.build());

        sink.emitDataStream(ds);

        env.execute("Flink kafka demo");
    }
}

另一种方式为

import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.formats.csv.CsvRowDeserializationSchema;
import org.apache.flink.formats.csv.CsvRowSerializationSchema;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.*;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.TableSchema;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
import java.util.Properties;

public class FlinkKafkaDemoT {
    public static void main(String[] args) throws Exception {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        final StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
        final TableSchema tableSchema = new TableSchema(new String[]{"imsi","lac","cell"}, new TypeInformation[]{Types.STRING, Types.STRING, Types.STRING});
        final TypeInformation typeInfo = tableSchema.toRowType();
        final CsvRowDeserializationSchema.Builder deserSchemaBuilder = new CsvRowDeserializationSchema.Builder(typeInfo).setFieldDelimiter(',');

        Properties properties = new Properties();
        properties.setProperty("bootstrap.servers", "hostA:6667");

        FlinkKafkaConsumer010 myConsumer = new FlinkKafkaConsumer010(
                "foo",
                deserSchemaBuilder.build(),
                properties);

        myConsumer.setStartFromLatest();

        DataStream stream = env.addSource(myConsumer);
        tableEnv.registerDataStream("KafkaCsvTable", stream);
        Table kafkaCsvTable = tableEnv.scan("KafkaCsvTable");
        Table result = kafkaCsvTable.where("lac != '5'").select("imsi,lac,cell");

        final CsvRowSerializationSchema.Builder serSchemaBuilder = new CsvRowSerializationSchema.Builder(typeInfo).setFieldDelimiter(',').setLineDelimiter("\r");

        DataStream ds = tableEnv.toAppendStream(result, typeInfo);
        FlinkKafkaProducer010 myProducer = new FlinkKafkaProducer010<>(
                "hostA:6667",
                "bar",
                serSchemaBuilder.build());

        myProducer.setWriteTimestampToKafka(true);

        ds.addSink(myProducer);

        env.execute("Flink kafka demo");
    }
}

注意上面代码中,下面两个类的引用需要配置阿里的仓库

import org.apache.flink.formats.csv.CsvRowDeserializationSchema;
import org.apache.flink.formats.csv.CsvRowSerializationSchema;

pom.xml文件如下



    4.0.0
    com.woople.tutorial.flink
    flink-examples
    1.0-SNAPSHOT
    
        
            ali
            ali
            http://maven.aliyun.com/nexus/content/groups/public
            
                true
            
            
                false
            
        
    
    
        
            org.apache.flink
            flink-streaming-java_2.11
            1.8.0
        
        
            org.apache.flink
            flink-streaming-scala_2.11
            1.8.0
        
        
            org.apache.flink
            flink-connector-kafka-0.10_2.11
            1.8.0
        
        
            org.apache.flink
            flink-csv
            1.8.0
        
        
            org.apache.flink
            flink-table-planner_2.11
            1.8.0
        
        
            org.apache.flink
            flink-java
            1.8.0
        
        
            org.apache.flink
            flink-scala_2.11
            1.8.0
        
    
    
        package
        
            
                org.apache.maven.plugins
                maven-resources-plugin
                
                    UTF-8
                
                
                    
                        
                            copy-resources
                        
                    
                
            
            
                net.alchim31.maven
                scala-maven-plugin
                3.2.2
                
                    
                        eclipse-add-source
                        
                            add-source
                        
                    
                    
                        scala-compile-first
                        process-resources
                        
                            compile
                        
                    
                    
                        scala-test-compile-first
                        process-test-resources
                        
                            testCompile
                        
                    
                    
                        attach-scaladocs
                        verify
                        
                            doc-jar
                        
                    
                
                
                    2.11.8
                    incremental
                    true
                
            
            
                org.apache.maven.plugins
                maven-compiler-plugin
                3.5.1
                
                    
                        compile
                        
                            compile
                        
                    
                
                
                    8
                    8
                
            
            
                org.apache.maven.plugins
                maven-shade-plugin
                2.4.1
                
                    
                        package
                        
                            shade
                        
                    
                
                
                    false
                    
                        
                            *:*
                            
                                META-INF/*.SF
                                META-INF/*.DSA
                                META-INF/*.RSA
                            
                        
                    
                    ${project.artifactId}-${project.version}-bundle
                
            
        
    

你可能感兴趣的:(Flink读写Kafka)