package org.feng.datasource;
import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
import org.apache.flink.api.java.io.TextInputFormat;
import org.apache.flink.core.fs.Path;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.source.FileProcessingMode;
/**
* Created by Feng on 2019/12/5 16:59
* CurrentProject's name is flink
* 文件读取
* @author Feng
*/
public class FileDataSource {
public static void main(String[] args) throws Exception {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// 读取文件:args[0]是一个文件路径
DataStream<String> dataStream = env.readTextFile(args[0]);
// 读取文件:args[0]是一个文件路径,指定字符编码格式
DataStream<String> dataStream1 = env.readTextFile(args[0], "UTF-8");
// 按照指定格式读取文件
DataStream<String> dataStream2 = env.readFile(new TextInputFormat(new Path(args[0])), args[0]);
/*
* inputFormat:数据流的输入格式
* filePath:文件路径
* watchType:读取方式
* FileProcessingMode.PROCESS_ONCE 读取一次
* FileProcessingMode.PROCESS_CONTINUOUSLY 文件更改后重新读取
* interval:定期扫描的时间间隔
* typeinformation:输入流中元素的类型
*/
DataStream<String> dataStream3 = env.readFile(new TextInputFormat(new Path(args[0])), args[0],
FileProcessingMode.PROCESS_ONCE,1, BasicTypeInfo.STRING_TYPE_INFO);
dataStream.print();
dataStream1.print();
dataStream2.print();
dataStream3.print();
env.execute();
}
}
package org.feng.datasource;
import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import java.io.Serializable;
import java.util.Arrays;
import java.util.Iterator;
/**
* Created by Feng on 2019/12/5 17:19
* CurrentProject's name is flink
* 构建无序
* 基于集合构建
* 基于元素创建
* 自定义迭代器
* @author Feng
*/
public class GenerateDataSource {
public static void main(String[] args) throws Exception {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// 构建从0到10的序列(无序),再映射成字符串,指定返回类型,打印,并行为1
env.generateSequence(0, 10)
.map(num -> "feng" + num)
.returns(Types.STRING)
.print()
.setParallelism(1);
// 从集合创建源并打印
env.fromCollection(Arrays.asList(1,2,3,4,5,6,7,8))
.returns(Types.INT)
.print()
.setParallelism(1);
// 从元素构建并打印
env.fromElements("hello", "world", "Flink")
.returns(Types.STRING)
.print()
.setParallelism(1);
// 自定义迭代器构建一个字符串
env.fromCollection(new StringIterator(), BasicTypeInfo.STRING_TYPE_INFO)
.returns(Types.STRING)
.print()
.setParallelism(1);
env.execute();
}
/**
* 生成100个字符串
*/
private static class StringIterator implements Iterator<String>, Serializable {
private static final long serialVersionUID = 2746816923013094383L;
private int index = 0;
@Override
public boolean hasNext() {
return index < 100;
}
@Override
public String next() {
index ++;
return "fengsoshuai" + index;
}
}
}
这里若是不知道怎么运行,请查看我的Flink入门程序的博文。
package org.feng.datasource;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
/**
* Created by Feng on 2019/12/5 17:47
* CurrentProject's name is flink
* @author Feng
*/
public class SocketDataSource {
public static void main(String[] args) throws Exception {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// 主机地址
String hostname = "localhost";
// 监听端口
int port = 12345;
// 用于分隔每条记录的分隔符
String delimiter = "\n";
// 重试次数:负数表示一直重试;0表示不重试;
int maxRetry = 3;
env.socketTextStream(hostname, port, delimiter, maxRetry)
.returns(Types.STRING)
.print()
.setParallelism(1);
env.execute();
}
}
package org.feng.datasource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.source.ParallelSourceFunction;
import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import java.io.Serializable;
/**
* Created by Feng on 2019/12/5 18:25
* CurrentProject's name is flink
* addSource方法的使用:自定义数据源
* @author Feng
*/
public class DefineDataSource implements Serializable {
private static final long serialVersionUID = 6883121931605774645L;
public static void main(String[] args) throws Exception {
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DefineDataSource dds = new DefineDataSource();
// 普通方式:不并行
env.addSource(new SourceFunction<String>() {
private static final long serialVersionUID = -2595016291338802241L;
private String temp = "fengsoshaui-SourceFunction-";
// 次数
private int count = 0;
private volatile boolean running = true;
@Override
public void run(SourceContext<String> sourceContext) {
int thousand = 1000;
while(running && count++ < thousand){
sourceContext.collect(temp + count);
}
}
@Override
public void cancel() {
running = false;
}
}).print().setParallelism(1);
// SourceFunction的子接口:具有并行度
env.addSource(new ParallelSourceFunction<String>() {
private static final long serialVersionUID = -8628017068895567640L;
private String temp = "fengsoshaui-ParallelSourceFunction-";
// 次数
private int count = 0;
private volatile boolean running = true;
@Override
public void run(SourceContext<String> sourceContext) throws Exception {
dds.run(sourceContext, running, count, temp);
}
@Override
public void cancel() {
running = false;
}
}).print().setParallelism(1);
// ParallelSourceFunction的子接口:具有并行度
env.addSource(new RichParallelSourceFunction<String>() {
private static final long serialVersionUID = -3591733182357219507L;
private String temp = "fengsoshaui-RichParallelSourceFunction-";
// 次数
private int count = 0;
private volatile boolean running = true;
@Override
public void run(SourceContext<String> sourceContext) throws Exception {
dds.run(sourceContext, running, count, temp);
}
@Override
public void cancel() {
running = false;
}
}).print().setParallelism(1);
env.execute();
}
private void run(SourceFunction.SourceContext<String> sourceContext, boolean running, int count, String temp){
int thousand = 1000;
while(running && count++ < thousand) {
sourceContext.collect(temp + count);
}
}
}