HDFS 文件读取 (根据文件路径读取hdfs下的文件内容)

/**
 * 根据文件路径读取hdfs下的文件内容
 * @param hdfsPath hdfs访问路径
 * @param pathName 要读取文件的全路径
 * @return
 */
public static List fileReadFromHdfs(String hdfsPath,String pathName){
    List list = new ArrayList<>();
    try {
        Configuration conf = new Configuration();
        URI uri = URI.create(hdfsPath);
        FileSystem hdfs = null;
        Path path = new Path(pathName);
        hdfs = FileSystem.get(uri, conf, "hdfs");
        log.info("----------------------------------");
        log.info("hdfs uri:"+uri);
        log.info("path: "+path);
        log.info("----------------------------------");
        FileStatus[] files = hdfs.globStatus(path);
        for (FileStatus file : files) {
            if (file.isDirectory()) {
                RemoteIterator iterator = hdfs.listFiles(file.getPath(), false);
                while (iterator.hasNext()) {
                    LocatedFileStatus fileStatus = iterator.next();
                    Path fullPath = fileStatus.getPath();
                    System.out.println(fullPath);
                    log.info("---------文件夹下的子文件路径:"+fullPath);

                    //------start   获取到全路径  读取文件内容
                    FileSystem fs = FileSystem.get(URI.create(fullPath.toString()),conf);
                    FSDataInputStream hdfsInStream = fs.open(new Path(fullPath.toString()));
                    BufferedReader reader = new BufferedReader(new InputStreamReader(hdfsInStream));
                    String line = null;
                    while ((line=reader.readLine()) != null) {
                        System.out.println(line);
                        list.add(line.toString());
                    }
                    hdfsInStream.close();
                    fs.close();
                    //------end   获取到全路径  读取文件内容

                }
            } else {
                System.out.println(file.getPath());
            }
        }

    } catch (IOException e) {
        e.printStackTrace();
    } catch (InterruptedException e) {
        e.printStackTrace();
    }
    return list;
}

你可能感兴趣的:(hdfs)