Java查询HDFS文件系统

1、文件元数据 FileStatus
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FsStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;

import java.io.IOException;
import java.net.URI;

public class ShowFileStatus {

    public static void main(String[] args) {
        try {
            URI uri = URI.create("hdfs://192.168.1.100:9000/home/ossuser/1.txt");

            Configuration conf = new Configuration();
            FileSystem fs = FileSystem.get(uri, conf);

            Path path = new Path(uri);
            FsStatus status = fs.getStatus(path);

            System.out.println("fsStatus.getCapacity() = " + status.getCapacity());
            System.out.println("fsStatus.getUsed() = " + status.getUsed());
            System.out.println("fsStatus.getRemaining() = " + status.getRemaining());

            System.out.println("----------------------------------");

            FileStatus fileStatus = fs.getFileStatus(path);

            System.out.println("fileStatus.getOwner() = " + fileStatus.getOwner());
            System.out.println("fileStatus.getGroup() = " + fileStatus.getGroup());
            System.out.println("fileStatus.getAccessTime() = " + fileStatus.getAccessTime());
            System.out.println("fileStatus.getBlockSize() = " + fileStatus.getBlockSize());
            System.out.println("fileStatus.getLen() = " + fileStatus.getLen());
            System.out.println("fileStatus.getModificationTime() = " + fileStatus.getModificationTime());
            System.out.println("fileStatus.getReplication() = " + fileStatus.getReplication());

            FsPermission fsPermission = fileStatus.getPermission();
            System.out.println("fileStatus.getPermission() = " + fsPermission);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

输入如下:

fsStatus.getCapacity() = 38046990336
fsStatus.getUsed() = 187760640
fsStatus.getRemaining() = 14045487104
----------------------------------
fileStatus.getOwner() = ossuser
fileStatus.getGroup() = supergroup
fileStatus.getAccessTime() = 1555746057611
fileStatus.getBlockSize() = 134217728
fileStatus.getLen() = 1935838
fileStatus.getModificationTime() = 1555746058582
fileStatus.getReplication() = 3
fileStatus.getPermission() = rw-r--r--

FileStatus封装了目录与文件的元数据信息,包括所有者、块大小、文件长度、修改时间、副本数、权限等信息

2、列出文件
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;

import java.io.IOException;
import java.net.URI;

public class ShowFileStatus {

    public static void main(String[] args) {
        try {
            URI uri = URI.create("hdfs://192.168.1.100:9000/home/ossuser/");

            Configuration conf = new Configuration();
            FileSystem fs = FileSystem.get(uri, conf);

            Path path = new Path(uri);

            FileStatus[] fileStatuses = fs.listStatus(path, new PathFilter() {
                @Override
                public boolean accept(Path path) {
                    try {
                        return fs.getFileStatus(path).isDirectory();
                    } catch (IOException e) {
                        return false;
                    }
                }
            });
            for (FileStatus fileStatus : fileStatuses) {
                System.out.println(fileStatus.getPath());
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

输出结果

hdfs://192.168.1.100:9000/home/ossuser/aa

输出多个路径的文件列表信息:

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;

import java.io.IOException;
import java.net.URI;

public class FileStat2Paths {

    public static void main(String[] args) {
        try {
            Path[] paths = new Path[]{
                new Path("hdfs://192.168.1.100:9000/"),
                new Path("hdfs://192.168.1.100:9000/home/ossuser"),
            };

            Configuration configuration = new Configuration();
            FileSystem fs = FileSystem.get(URI.create(paths[0].toString()), configuration);

            FileStatus[] status = fs.listStatus(paths);

            Path[] listedPaths = FileUtil.stat2Paths(status);
            for(Path listedPath : listedPaths) {
                System.out.println(listedPath);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}
hdfs://192.168.1.100:9000/2019
hdfs://192.168.1.100:9000/home
hdfs://192.168.1.100:9000/home/ossuser/1.txt
hdfs://192.168.1.100:9000/home/ossuser/aa
hdfs://192.168.1.100:9000/home/ossuser/log1.txt
3、文件模式

在一个表达式中使用通配符(globbing)匹配多个文件,FileSystem为匹配通配符提供了两个方法:

  • public FileStatus[] globStatus(Path pathPattern) throws IOException ;
  • public FileStatus[] globStatus(Path pathPattern, PathFilter filter) throws IOException
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import java.io.IOException;
import java.net.URI;

public class GlobStatus {

    public static void main(String[] args) {
        URI uri = URI.create("hdfs://192.168.1.100:9000/");

        try {
            Configuration configuration = new Configuration();
            FileSystem fs = FileSystem.get(uri, configuration);

            FileStatus[] status = fs.globStatus(new Path("/*/{01,02, ossuser}"));
            for (FileStatus fileStatus : status) {
                System.out.println(fileStatus.getPath());
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

输出如下:

hdfs://192.168.1.100:9000/2019/01
hdfs://192.168.1.100:9000/2019/02

通配符只能作用于文件路径与名称,不能作用于文件属性,可结合PathFilter接口实现类以达成更加灵活的文件或目录过滤。

你可能感兴趣的:(Java查询HDFS文件系统)