java操作HDFS

操作步骤

  • IDEA + MAVEN 创建JAVA工程

    本地安装maven,IDEA配置maven并创建相应的maven项目

  • 添加HDFS相关依赖
  
    2.6.0-cdh5.7.0//指定hadoop版本
  
  
    
      cloudera
      https://repository.cloudera.com/artifactory/cloudera-repos//指定仓库地址
    
  
  
    org.apache.hadoop //添加hadoop依赖
    hadoop-client
    ${hadoop.version}
  
  • 开发Java api 操作HDFS文件
准备工作
  import org.apache.hadoop.conf.Configuration;
  import org.apache.hadoop.fs.FSDataInputStream;
  import org.apache.hadoop.fs.FSDataOutputStream;
  import org.apache.hadoop.fs.FileSystem;
  import org.apache.hadoop.fs.Path;
  import org.apache.hadoop.io.IOUtils;
  
  public static final String HDFS_PATH = "hdfs://192.168.247.100:9000"; //hdfs路径
  FileSystem fileSystem = null;  //操作Hdfs核心类
  Configuration configuration = null;  //配置类

  @Before
  public void setUp() throws Exception{
       System.out.println("set up");
       configuration = new Configuration();
       fileSystem = FileSystem.get(new URI(HDFS_PATH), configuration);  //如用户无权限,可加用户参数
   }

   @After
   public void tearDown() throws Exception {
       configuration = null;
       fileSystem = null;
       System.out.print("tearDowm");
   }
创建HDFS文件目录
    @Test
    public void mkdir() throws Exception{
        fileSystem.mkdirs(new Path("/hdfsapi/test"));
    }
创建HDFS文件
    @Test
    public void create() throws Exception{
        FSDataOutputStream outputStream = fileSystem.create(new Path("/hdfsapi/test/test.txt"));
        outputStream.write("Hello World".getBytes());
        outputStream.flush();
        outputStream.close();
    }
查看HDFS文件内容
    @Test
    public void cat() throws Exception{
        FSDataInputStream inputStream = fileSystem.open(new Path("/hdfsapi/test/test.txt"));
        IOUtils.copyBytes(inputStream, System.out, 1024);
        inputStream.close();
    }
文件重命名
    @Test
    public void rename() throws Exception{
        fileSystem.rename(new Path("/hdfsapi/test/test.txt"), new Path("/hdfsapi/test/test1.txt"));
    }
上传本地文件
    @Test
    public void copyFromLocalFile() throws Exception{
        fileSystem.copyFromLocalFile(new Path("E://U盘/test.sql"), new Path("/hdfsapi/test/")); //Windows系统和Linux系统都可以,写法不一样
    }
上传本地大文件带进度条
    @Test
    public void copyFromLocalFileWithProgress() throws Exception{
        InputStream is = new BufferedInputStream(
                new FileInputStream(
                        new File("E://game.tgz")
                )
        );
        FSDataOutputStream outputStream = fileSystem.create(
                new Path("/hdfsapi/test/zookeeper.tar.gz"),
                new Progressable() {
                    @Override
                    public void progress() {
                        System.out.print(".");  //自定义进度条显示
                    }
                }
        );
        IOUtils.copyBytes(is,outputStream,4096);
    }
  }
下载HDFS文件
    @Test
    public void copyToLocalFile() throws Exception{
        fileSystem.copyToLocalFile(false, new Path("/hdfsapi/test/test.txt"), new Path("F://test.txt"), true);
        //public void copyToLocalFile(boolean delSrc, Path src, Path dst, boolean useRawLocalFileSystem)
        //使用java io流 而不使用本地文件系统,windows会报空指针异常
    }
查看HDFS目录
    @Test
    public void listFiles() throws Exception{
        FileStatus[] fileStatuses = fileSystem.listStatus(new Path("/hdfsapi/test"));
        Arrays.stream(fileStatuses).forEach(fileStatus -> {
            String isDir = fileStatus.isDirectory()? "文件夹" : "文件";
            Short replication = fileStatus.getReplication(); //副本系数
            /**
            *如果采用HDFS Shell put的方式上传文件,采用服务器设置的默认副本系数
            *如果采用java api 方式上传文件,本地没有设置副本系数,默认采用hadoop默认副本系数3
            **/
            Long blockSize = fileStatus.getBlockSize();
            Long len = fileStatus.getLen();
            String path = fileStatus.getPath().toString();

            System.out.println(isDir+"\t"+replication+"\t"+blockSize+"\t"+len+"\t"+path);
        });
    }
删除
    @Test
    public void delete() throws Exception{
        fileSystem.delete(new Path("/hdfsapi/test/zookeeper.tar.gz"),true);//是否递归删除
    }

你可能感兴趣的:(java操作HDFS)