在GitHub 下载源码并编译:
git clone [email protected]:apache/flink.git
git checkout release-1.6.3
编译命令:
mvn clean install -DskipTests -Dmaven.javadoc.skip=true -Dcheckstyle.skip=true
编译报错1:
Could not find artifact com.mapr.hadoop:maprfs:jar:5.2.1-mapr in nexus-osc (http://maven.aliyun.com/nexus/content/repositories/central)
解决办法: 则手动安装maprfs的包到本地maven私服,命令如下:
1.下载
下载jar包 https://repository.mapr.com/nexus/content/groups/mapr-public/com/mapr/hadoop/maprfs/5.2.1-mapr/maprfs-5.2.1-mapr.jar 到 /Users/liyuhuan/downloads
2.安装
mvn install:install-file -DgroupId=com.mapr.hadoop -DartifactId=maprfs -Dversion=5.2.1-mapr -Dpackaging=jar -Dfile=/Users/liyuhuan/downloads/maprfs-5.2.1-mapr.jar
编译报错2
[ERROR] Failed to execute goal org.apache.maven.plugins:maven-compiler-plugin:3.1:compile (default-compile) on project flink-mapr-fs: Compilation failure: Compilation failure:
[ERROR] /Users/liyuhuan/code/flink/flink-filesystems/flink-mapr-fs/src/main/java/org/apache/flink/runtime/fs/maprfs/MapRFileSystem.java:[70,44] 程序包org.apache.hadoop.fs不存在
[ERROR] /Users/liyuhuan/code/flink/flink-filesystems/flink-mapr-fs/src/main/java/org/apache/flink/runtime/fs/maprfs/MapRFileSystem.java:[73,45] 找不到符号
[ERROR] 符号: 类 Configuration
[ERROR] 位置: 程序包 org.apache.hadoop.conf
[ERROR] /Users/liyuhuan/code/flink/flink-filesystems/flink-mapr-fs/src/main/java/org/apache/flink/runtime/fs/maprfs/MapRFileSystem.java:[73,93] 找不到符号
解决办法:在flink-maprs-fs的pom.xml中加入
org.apache.hadoop
hadoop-common
${hadoop.version}
编译报错3
[ERROR] Failed to execute goal on project flink-avro-confluent-registry: Could not resolve dependencies for project org.apache.flink:flink-avro-confluent-registry:jar:1.6.3: Failure to find io.confluent:kafka-schema-registry-client:jar:3.3.1 in http://maven.aliyun.com/nexus/content/groups/public was cached in the local repository, resolution will not be reattempted until the update interval of nexus-aliyun has elapsed or updates are forced -> [Help 1]
解决办法: 则手动安装kafka的包到本地maven私服,命令如下:
1.下载
下载 http://packages.confluent.io/maven/io/confluent/kafka-schema-registry-client/3.3.1/kafka-schema-registry-client-3.3.1.jar 到 /Users/liyuhuan/downloads
2.安装
mvn install:install-file -DgroupId=io.confluent -DartifactId=kafka-schema-registry-client -Dversion=3.3.1 -Dpackaging=jar -Dfile=/Users/liyuhuan/downloads/kafka-schema-registry-client-3.3.1.jar
在idea debug中新建remote
JobManager 配置
-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=5005
TaskManager配置
-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=5006
在flink-conf.yaml配置文件中加入
env.java.opts.jobmanager: -agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=5005
env.java.opts.taskmanager: -agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=5006
Client 配置
在编译好的bin/flink脚本中加入:
JVM_REMOTE_DEBUG_ARGS='-agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=5007'
修改最后一行中加入 $JVM_REMOTE_DEBUG
exec $JAVA_RUN $JVM_ARGS $JVM_REMOTE_DEBUG_ARGS "${log_setting[@]}" -classpath "`manglePathList "$CC_CLASSPATH:$INTERNAL_HADOOP_CLASSPATHS"`" org.apache.flink.client.cli.CliFrontend "$@"
启动cluster
bin/start-cluster.sh
在org.apache.flink.runtime.entrypoint.StandaloneSessionClusterEntrypoint main class打断点即可调试
org.apache.flink.runtime.taskexecutor.TaskManagerRunner tm的主类为这个。
刚开始我们编写一个简单的flink消费kafka的wordcount程序
package com.lyh.flink.kafka;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.functions.ReduceFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010;
import org.apache.flink.util.Collector;
import java.util.Properties;
public class FlinkKafkaWordCount {
public static void main(String[] args) throws Exception {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.enableCheckpointing(5000);
env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime);
Properties properties = new Properties();
properties.setProperty("bootstrap.servers", "localhost:9092");
properties.setProperty("group.id", "flink-group");
FlinkKafkaConsumer010 consumer = new FlinkKafkaConsumer010<>("test", new SimpleStringSchema(),
properties);
DataStream counts = env
.addSource(consumer)
.flatMap(new FlatMapFunction() {
@Override
public void flatMap(String value, Collector collector) throws Exception {
for (String word : value.split("\t")) {
collector.collect(new WordWithCount(word, 1L));
}
}
})
.keyBy("word")
.timeWindow(Time.seconds(5))
.reduce(new ReduceFunction() {
@Override
public WordWithCount reduce(WordWithCount a, WordWithCount b) throws Exception {
return new WordWithCount(a.word, a.count + b.count);
}
});
counts.print().setParallelism(1);
env.execute("flink-kafka-wordcount");
}
public static class WordWithCount {
private String word;
private long count;
public WordWithCount() {
}
public WordWithCount(String word, long count) {
this.word = word;
this.count = count;
}
public String getWord() {
return word;
}
public void setWord(String word) {
this.word = word;
}
public long getCount() {
return count;
}
public void setCount(long count) {
this.count = count;
}
@Override
public String toString() {
return word + " : " + count;
}
}
}
将上述程序打包 mvn package
将第一步编译完成的Flink启动
bin/start-cluster.sh
提交example
bin/flink run -c com.lyh.flink.kafka.FlinkKafkaWordCount /Users/liyuhuan/code/spark_study/target/spark_study-1.0-SNAPSHOT.jar