hadoop 文件目录操作

可以用bin/Hadoop fs -ls 来读取HDFS上的文件属性信息。

也可以采用HDFS的API来读取。如下:

import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileStatus;
public class FileInfo
{
 public static void main(String[] args) throws Exception
 {
  if(args.length != 1){
  System.out.println("Usage FileInfo ");
  System.exit(1);
  }
  Configuration conf = new Configuration();
  FileSystem hdfs = FileSystem.get(URI.create(args[0]),conf);
  FileStatus fs = hdfs.getFileStatus(new Path(args[0]));
  System.out.println("path: "+fs.getPath());
  System.out.println("length: "+fs.getLen());
  System.out.println("modify time: "+fs.getModificationTime());
  System.out.println("owner: "+fs.getOwner());
  System.out.println("replication: "+fs.getReplication());
  System.out.println("blockSize: "+fs.getBlockSize());
  System.out.println("group: "+fs.getGroup());
  System.out.println("permission: "+fs.getPermission().toString());
 }

}




/**
 * @see 读取path下的所有文件
 * @param path
 * @return
 * @throws IOException
*/
 public static String[] getFileList(String path) throws IOException{

Configuration conf = new Configuration();
 FileSystem fs = FileSystem.get(conf);
 List files = new ArrayList();
 Path s_path = new Path(path);
if(fs.exists(s_path)){

for(FileStatus status:fs.listStatus(s_path)){


files.add(status.getPath().toString());

}
}
fs.close();

return files.toArray(new String[]{});
}

你可能感兴趣的:(hadoop)