HDFS Java API实操&HDFS文件操作代码

1、创建maven quickstart项目

(1)配置maven环境

1)settings.xml 2)idea中的配置

(2)pom文件中添加依赖

<properties>
	<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> 
	<maven.compiler.source>1.7</maven.compiler.source> 
	<maven.compiler.target>1.7</maven.compiler.target> 
	<hadoop.version>2.6.0-cdh5.7.0</hadoop.version>
</properties>

<dependency>
	<groupId>org.apache.hadoop</groupId> 
	<artifactId>hadoop-client</artifactId> 		
	<version>${hadoop.version}</version>
</dependency>

<repositories>
	<repository>
    	<id>cloudera</id>
		<url>https://repository.cloudera.com/artifactory/cloudera-repos</url>
	</repository>
</repositories>

2、HDFS Java API

package cn.kgc.cn;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.util.Progressable;
import java.io.BufferedInputStream; import java.io.File;
import java.io.FileInputStream; import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException; import java.text.SimpleDateFormat;

public class Test {
//hdfs的URI
private static final String HDFS_PATH = "hdfs://localhost:9000"; 
//配置文件对象
private static Configuration cfg = null;
//文件系统对象
private static FileSystem fileSystem = null;

/**
* 创建目录:mdkir
*/
public static void createDir() throws IOException { 
	System.out.println("开始创建目录..."); 
	fileSystem.mkdirs(new Path("/hadoop")); 
	System.out.println("目录创建成功...");
}

/**
* 创建文件:touch
*/
public void createFile() throws IOException {
        System.out.println("开始创建文件。。。");
        FSDataOutputStream newFile = fileSystem.create(new Path("/hadoop/test.txt"));
        newFile.write("hello".getBytes());
        newFile.flush();
        newFile.close();
        System.out.println("文件创建成功。。。。。。。。。。。。。。。。");
    }

/**
* 查看文件内容:cat
*/
public static void catFile() throws IOException {
	FSDataInputStream in = fileSystem.open(new Path("/hadoop/a.txt"));
	IOUtils.copyBytes(in, System.out, 1024);
}

/**
* 查看文件列表:list
*/
public static void listFile() throws IOException {
	FileStatus[] fileStatuses = fileSystem.listStatus(new Path("/hadoop")); 
	for (FileStatus file : fileStatuses) {
	//判断是普通文件还是目录
	String isDir = file.isDirectory() ? "d" : "-";
 	//文件权限
	FsPermission permission = file.getPermission();
 	//文件大小
	long len = file.getLen();
	//文件副本
	short replication = file.getReplication(); 
	//访问时间
	long accessTime = file.getAccessTime();
	//文件块大小
	long blockSize = file.getBlockSize();
	//所属组
	String group = file.getGroup();
	//所有者
	String owner = file.getOwner();
	//最后一次修改时间
	long modificationTime = file.getModificationTime();
	System.out.println(
	isDir + permission + "\t" + owner + "\t" + group
	+ "\t" + len + "\t"
	+ modificationTime + "\t" + replication
	+ "\t" + blockSize / 1024 / 1024 + "M\t" +
	file.getPath());
	} 
}

/**
* 重命名:rename
*/
public static void rename() throws IOException { 
	Path oldPath = new Path("/hadoop/a.txt"); 
	Path newPath = new Path("/hadoop/test.txt"); 
	fileSystem.rename(oldPath, newPath);
}

/**
* 删除文件
*/
public static void delete()throws IOException{
        Path path = new Path("/hadoop/ubuntu-16.04.3-desktop-amd64.iso");
        if (fileSystem.exists(path)){
            fileSystem.delete(path);
            System.out.println("文件删除成功");
        }else {
            System.out.println("文件不存在");
        }
        fileSystem.deleteOnExit(path);
    }
    
/**
* 文件上传(小文件)
*/
public static void copyFromLocal() throws IOException {
	Path localPath = new Path("D:\\软件实施.png");
	Path remotePath = new Path("/hadoop/软件实施.png"); 
	fileSystem.copyFromLocalFile(localPath,remotePath);
}
/**
* 下载文件
*/
public static void copyToLocal() throws IOException{ 
Path localPath = new Path("D:\\1.png");
Path remotePath = new Path("/hadoop/软件实施.png"); 
fileSystem.copyToLocalFile(remotePath,localPath);
}
/**
* 上传大文件
*/
public static void copyFromLocalWithProcess() throws IOException {
        //创建带缓冲区的输入流,读取要上传到hdfs的本地文件
        BufferedInputStream in = new BufferedInputStream(
                new FileInputStream(new File("H:\\system\\ubuntu-16.04.3-desktop-amd64.iso")));
        //以输出流的形式在hdfs上生成新的文件,上传过程中可以显示进度
        FSDataOutputStream output = fileSystem.create(new Path("/hadoop/ubuntu-16.04.3-desktop-amd64.iso"),
                new Progressable() {
                    @Override
                    public void progress() {
                        System.out.print(">");
                    }
                });
        //生成文件
        IOUtils.copyBytes(in, output, 4096);
    }

/**
* 主方法
*/
public static void main(String[] args) throws Exception {
        cfg = new Configuration();
        //操作对象"hadoop",只有部署在虚拟机时写,本地hadoop不用写
        fileSystem = FileSystem.get(new URI(HDFS_PATH), cfg, "hadoop");
        System.out.println("start=====================");

        //依次调用各个方法
        //调用创建目录的方法
        //createDir();
		//调用创建文件或目录的方法
		//createFile();
		//catFile();
        listFiles();
        //rename();
        //copyFromLocal();
		//copyToLocal();
		//delete();
  		//copyFromLocalWithProcess();
    }
    
}

你可能感兴趣的:(大数据)