Flink运行wordcount——读写hdfs

package com.test;

import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.util.Collector;

public class ReadHDFS {
    public static void main(String[] args) {
        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        DataStream text = env.readTextFile("hdfs://10.9.13.171:54310/testdir/abc.txt"); // your source here

        DataStream> wordCounts = text.flatMap(new FlatMapFunction>() {
            @Override
            public void flatMap(String value, Collector> out) throws Exception {
                String[] words = value.toLowerCase().split("\\s+");
                for (String word : words) {
                    if (!word.isEmpty()) {
                        out.collect(new Tuple2<>(word, 1));
                    }
                }
            }
        });

        wordCounts.print();
        wordCounts.writeAsText("hdfs://10.9.13.171:54310/testdir/wordcountoutput");

        try {
            env.execute("WordCount Job");
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }
}

pom.xml文件



    4.0.0

    org.example
    flink-test
    1.0-SNAPSHOT

    
        8
        8
        UTF-8
        1.14.5
        3.1.2
    

    
        
            org.apache.flink
            flink-java
            ${flink.version}
        
        
        
            org.apache.flink
            flink-core
            ${flink.version} 
        
        
            org.apache.flink
            flink-streaming-java_2.12
            ${flink.version} 
        
        
            org.apache.flink
            flink-clients_2.12
            ${flink.version} 
        
        
            org.apache.hadoop
            hadoop-client
            ${hadoop.version} 
        
        
            org.apache.hadoop
            hadoop-hdfs
            ${hadoop.version} 
        
    
    
        
            
            
                org.apache.maven.plugins
                maven-assembly-plugin
                3.6.0
                
                    
                        
                            com.test.ReadHDFS
                        
                    
                    
                        jar-with-dependencies
                    
                
                
                    
                        make-assembly 
                        package 
                        
                            single
                        
                    
                
            
        
    

打包后提交到yarn集群命令

[root@node171 lib]# flink run -m yarn-cluster  flink-test-1.0-SNAPSHOT-jar-with-dependencies.jar 

报错Flink运行wordcount——读写hdfs_第1张图片

你可能感兴趣的:(#,Flink-Java版,flink,hdfs,大数据)