自定义OperatorState

自定义OperatorState实现恰好一次读取文件内容

public class FileSourceWithState extends RichParallelSourceFunction> implements CheckpointedFunction {

    private String path;
    public FileSourceWithState(String path){
        this.path=path;
    }

    private transient ListState offsetListState;
    private boolean isRunning=true;
    private Long offset=0L;
    @Override
    public void run(SourceContext> sourceContext) throws Exception {
        Iterator iterator = offsetListState.get().iterator();
        while (iterator.hasNext()){
            offset=iterator.next();
        }
        //获取当前subTask
        int subtask = getRuntimeContext().getIndexOfThisSubtask();
        String fileName=path + "/" + subtask + ".txt";
        RandomAccessFile randomAccessFile = new RandomAccessFile(fileName, "r");
        //从指定的offset读取文件内容
        randomAccessFile.seek(offset);
        final Object checkpointLock = sourceContext.getCheckpointLock();
        while (isRunning){
            String line = randomAccessFile.readLine();
            if(line!=null){
                String message = new String(line.getBytes("ISO-8859-1"), "utf-8");
                synchronized (checkpointLock){//与snapshotState方法共享offset,会有线程安全问题,所以要加锁
                    offset = randomAccessFile.getFilePointer();
                    sourceContext.collect(new Tuple2(fileName,message));
                }
            }else {
                Thread.sleep(1000);
            }
        }
    }

    @Override
    public void cancel() {
        isRunning=false;
    }

    /**
     * 定期将state保存到statebackend中
     * @param functionSnapshotContext
     * @throws Exception
     */
    @Override
    public void snapshotState(FunctionSnapshotContext functionSnapshotContext) throws Exception {
        //清除历史数据
        offsetListState.clear();
        //更新最新值
        offsetListState.add(offset);
    }

    /**
     * 只执行一次
     * @param functionInitializationContext
     * @throws Exception
     */
    @Override
    public void initializeState(FunctionInitializationContext functionInitializationContext) throws Exception {

        //初始化或者获取历史状态
        //getRuntimeContext()获取的state是keyState,如何获取opratorState?
        ListStateDescriptor offsetListStateDescriptor=new ListStateDescriptor("offset-state", TypeInformation.of(new TypeHint() {
        }));
        offsetListState = functionInitializationContext.getOperatorStateStore().getListState(offsetListStateDescriptor);
    }
}
public static void main(String[] args) throws Exception{
    StreamExecutionEnvironment env=StreamExecutionEnvironment.getExecutionEnvironment();
    env.setParallelism(2);
    env.enableCheckpointing(1000);
    env.setRestartStrategy(RestartStrategies.fixedDelayRestart(2,2000));
    env.setStateBackend(new FsStateBackend("file:///D:\\temp\\flink\\checkpoint"));
    env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

    DataStreamSource> streamSource = env.addSource(new FileSourceWithState("D:\\temp\\flink\\file"));
    streamSource.print();
    env.execute(OperatorStateApplication.class.getSimpleName());
}

你可能感兴趣的:(研磨Flink,flink)