Windows:IDEA
Linux:Kafka,Zookeeper
2.1、pom.xml文件
4.0.0
com.test
flinkTestConsumeKafka
1.0-SNAPSHOT
UTF-8
1.7.2
1.7.7
1.2.17
org.apache.flink
flink-java
${flink.version}
org.apache.flink
flink-streaming-java_2.11
${flink.version}
org.apache.flink
flink-clients_2.11
${flink.version}
org.apache.flink
flink-connector-kafka-0.11_2.11
${flink.version}
compile
org.apache.flink
flink-connector-filesystem_2.11
${flink.version}
org.apache.flink
flink-core
${flink.version}
org.apache.hadoop
hadoop-hdfs
2.7.3
com.alibaba
fastjson
1.2.51
org.slf4j
slf4j-log4j12
${slf4j.version}
runtime
log4j
log4j
${log4j.version}
runtime
org.apache.kafka
kafka-clients
1.1.1
org.apache.flink
flink-streaming-java_2.11
${flink.version}
org.apache.maven.plugins
maven-compiler-plugin
3.3
1.8
maven-assembly-plugin
com.allen.capturewebdata.Main
jar-with-dependencies
2.2、Java类编辑
2.2.1、KafkaDemo 类
package flink.kafkaFlink;
import java.util.Properties;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010;
public class KafkaDemo {
public static void main(String[] args) throws Exception {
// set up the streaming execution environment
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//默认情况下,检查点被禁用。要启用检查点,请在StreamExecutionEnvironment上调用enableCheckpointing(n)方法,
// 其中n是以毫秒为单位的检查点间隔。每隔5000 ms进行启动一个检查点,则下一个检查点将在上一个检查点完成后5秒钟内启动
env.enableCheckpointing(500);
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
Properties properties = new Properties();
properties.setProperty("bootstrap.servers", "singlecluster:9092");//kafka的节点的IP或者hostName,多个使用逗号分隔
properties.setProperty("zookeeper.connect", "singlecluster:2181");//zookeeper的节点的IP或者hostName,多个使用逗号进行分隔
properties.setProperty("group.id", "test-consumer-group");//flink consumer flink的消费者的group.id
System.out.println("11111111111");
FlinkKafkaConsumer010 myConsumer = new FlinkKafkaConsumer010("apache-flink-test", new org.apache.flink.api.common.serialization.SimpleStringSchema(), properties);
// FlinkKafkaConsumer010 myConsumer = new FlinkKafkaConsumer010("test",new SimpleStringSchema(),properties);//test0是kafka中开启的topic
myConsumer.assignTimestampsAndWatermarks(new CustomWatermarkEmitter());
DataStream keyedStream = env.addSource(myConsumer);//将kafka生产者发来的数据进行处理,本例子我进任何处理
System.out.println("2222222222222");
keyedStream.print();//直接将从生产者接收到的数据在控制台上进行打印
// execute program
System.out.println("3333333333333");
env.execute("Flink Streaming Java API Skeleton");
}
}
2.2.2、CustomWatermarkEmitter 类
package flink.kafkaFlink;
import org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks;
import org.apache.flink.streaming.api.watermark.Watermark;
public class CustomWatermarkEmitter implements AssignerWithPunctuatedWatermarks {
private static final long serialVersionUID = 1L;
public long extractTimestamp(String arg0, long arg1) {
if (null != arg0 && arg0.contains(",")) {
String parts[] = arg0.split(",");
return Long.parseLong(parts[0]);
}
return 0;
}
public Watermark checkAndGetNextWatermark(String arg0, long arg1) {
if (null != arg0 && arg0.contains(",")) {
String parts[] = arg0.split(",");
return new Watermark(Long.parseLong(parts[0]));
}
return null;
}
}
三、在Linux主机上启动服务
3.1、启动zookeeper和kafka(这里,zookeeper集成于kafka中)
cd /home/bigdata/kafka
3.1.1、启动zookeeper(后台运行)
nohup bin/zookeeper-server-start.sh config/zookeeper.properties &
查看zookeeper是否运行
jps
6740 QuorumPeerMain
关闭zookeeper
bin/zookeeper-server-stop.sh config/zookeeper.properties &
3.1.2、启动kafka(后台运行)
nohup bin/kafka-server-start.sh config/server.properties &
或者
nohup bin/kafka-server-start.sh config/server.properties >/dev/null 2>&1 &
查看kafka是否运行
jps
7587 Kafka
关闭kafka
bin/kafka-server-stop.sh config/server.properties
3.2、创建kafka topic
/home/bigdata/kafka/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic apache-flink-test
3.3、启动kafka生产者
/home/bigdata/kafka/bin/kafka-console-producer.sh --broker-list localhost:9092 --topic apache-flink-test
3.4、 kafka 常用命令
(1)、创建topic:
/home/bigdata/kafka/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic apache-flink-test
(2)、查看topic:
/home/bigdata/kafka/bin/kafka-topics.sh --list --zookeeper localhost:2181
(3)、生产者
/home/bigdata/kafka/bin/kafka-console-producer.sh --broker-list localhost:9092 --topic apache-flink-test
(4)、消费者
/home/bigdata/kafka/bin/kafka-console-consumer.sh --bootstrap-server localhost:9092 --topic apache-flink-test --from-beginning
kafka中消费 kafka topic 后应该关闭消费进程
(1)使用消费命令时,用 Ctrl + C 关闭消费进程
(2)jps -m 查看kafka消费进程号,之后杀死对应的进程
jps -m
kill -9 进程号
(5)、删除topic
/home/bigdata/kafka/bin/kafka-topics --delete --zookeeper 【zookeeper server:port】 --topic 【topic name】
[server.properties需要 设置delete.topic.enable=true]
四、运行程序KafkaDemo
4.1、在Linux服务器上kafka的生产者界面输入内容
4.2、查看IDEA的控制台
参考链接:
Flink+Kafka 接收流数据并打印到控制台 - 吹静静 - 博客园
https://www.cnblogs.com/chuijingjing/p/10535081.html