本文样例基于flink 1.8.0版本介绍如何通过flink读写kafka数据
完整样例代码
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.formats.csv.CsvRowDeserializationSchema;
import org.apache.flink.formats.csv.CsvRowSerializationSchema;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.Kafka010TableSink;
import org.apache.flink.streaming.connectors.kafka.Kafka010TableSource;
import org.apache.flink.streaming.connectors.kafka.KafkaTableSinkBase;
import org.apache.flink.streaming.connectors.kafka.KafkaTableSourceBase;
import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkFixedPartitioner;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.TableSchema;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.types.Row;
import java.util.Optional;
import java.util.Properties;
public class FlinkKafkaDemo {
public static void main(String[] args) throws Exception {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
final StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
final TableSchema tableSchema = new TableSchema(new String[]{"imsi", "lac", "cell"}, new TypeInformation[]{Types.STRING, Types.STRING, Types.STRING});
final TypeInformation typeInfo = tableSchema.toRowType();
final CsvRowDeserializationSchema.Builder deserSchemaBuilder = new CsvRowDeserializationSchema.Builder(typeInfo).setFieldDelimiter(',');
Properties properties = new Properties();
properties.setProperty("bootstrap.servers", "hostA:6667");
KafkaTableSourceBase kafkaTableSource = new Kafka010TableSource(
tableSchema,
"foo",
properties,
deserSchemaBuilder.build());
tableEnv.registerTableSource("KafkaCsvTable", kafkaTableSource);
Table kafkaCsvTable = tableEnv.scan("KafkaCsvTable");
Table result = kafkaCsvTable.where("lac != '5'").select("imsi,lac,cell");
DataStream ds = tableEnv.toAppendStream(result, typeInfo);
final CsvRowSerializationSchema.Builder serSchemaBuilder = new CsvRowSerializationSchema.Builder(typeInfo).setFieldDelimiter('|').setQuoteCharacter('\0').setLineDelimiter("\r");
KafkaTableSinkBase sink = new Kafka010TableSink(
result.getSchema(),
"bar",
properties,
Optional.of(new FlinkFixedPartitioner<>()),
serSchemaBuilder.build());
sink.emitDataStream(ds);
env.execute("Flink kafka demo");
}
}
另一种方式为
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.formats.csv.CsvRowDeserializationSchema;
import org.apache.flink.formats.csv.CsvRowSerializationSchema;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.*;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.TableSchema;
import org.apache.flink.table.api.java.StreamTableEnvironment;
import org.apache.flink.types.Row;
import java.util.Properties;
public class FlinkKafkaDemoT {
public static void main(String[] args) throws Exception {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
final StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env);
final TableSchema tableSchema = new TableSchema(new String[]{"imsi","lac","cell"}, new TypeInformation[]{Types.STRING, Types.STRING, Types.STRING});
final TypeInformation typeInfo = tableSchema.toRowType();
final CsvRowDeserializationSchema.Builder deserSchemaBuilder = new CsvRowDeserializationSchema.Builder(typeInfo).setFieldDelimiter(',');
Properties properties = new Properties();
properties.setProperty("bootstrap.servers", "hostA:6667");
FlinkKafkaConsumer010 myConsumer = new FlinkKafkaConsumer010(
"foo",
deserSchemaBuilder.build(),
properties);
myConsumer.setStartFromLatest();
DataStream stream = env.addSource(myConsumer);
tableEnv.registerDataStream("KafkaCsvTable", stream);
Table kafkaCsvTable = tableEnv.scan("KafkaCsvTable");
Table result = kafkaCsvTable.where("lac != '5'").select("imsi,lac,cell");
final CsvRowSerializationSchema.Builder serSchemaBuilder = new CsvRowSerializationSchema.Builder(typeInfo).setFieldDelimiter(',').setLineDelimiter("\r");
DataStream ds = tableEnv.toAppendStream(result, typeInfo);
FlinkKafkaProducer010 myProducer = new FlinkKafkaProducer010<>(
"hostA:6667",
"bar",
serSchemaBuilder.build());
myProducer.setWriteTimestampToKafka(true);
ds.addSink(myProducer);
env.execute("Flink kafka demo");
}
}
注意上面代码中,下面两个类的引用需要配置阿里的仓库
import org.apache.flink.formats.csv.CsvRowDeserializationSchema;
import org.apache.flink.formats.csv.CsvRowSerializationSchema;
pom.xml文件如下
4.0.0
com.woople.tutorial.flink
flink-examples
1.0-SNAPSHOT
ali
ali
http://maven.aliyun.com/nexus/content/groups/public
true
false
org.apache.flink
flink-streaming-java_2.11
1.8.0
org.apache.flink
flink-streaming-scala_2.11
1.8.0
org.apache.flink
flink-connector-kafka-0.10_2.11
1.8.0
org.apache.flink
flink-csv
1.8.0
org.apache.flink
flink-table-planner_2.11
1.8.0
org.apache.flink
flink-java
1.8.0
org.apache.flink
flink-scala_2.11
1.8.0
package
org.apache.maven.plugins
maven-resources-plugin
UTF-8
copy-resources
net.alchim31.maven
scala-maven-plugin
3.2.2
eclipse-add-source
add-source
scala-compile-first
process-resources
compile
scala-test-compile-first
process-test-resources
testCompile
attach-scaladocs
verify
doc-jar
2.11.8
incremental
true
org.apache.maven.plugins
maven-compiler-plugin
3.5.1
compile
compile
8
org.apache.maven.plugins
maven-shade-plugin
2.4.1
package
shade
false
*:*
META-INF/*.SF
META-INF/*.DSA
META-INF/*.RSA
${project.artifactId}-${project.version}-bundle