Hadoop --- 入门之HDFS的JAVA API操作

JAR准备:

将hadoop-2.8.0中share目录下的jar包添加到工程中:

  • common下的hadoop-common-2.8.0.jar
  • common/lib下的所有jar
  • hdfs下的hadoop-hdfs-2.8.0.jar
  • hdfs/lib下的所有jar

 

示例:

import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.net.URI;
import java.util.Iterator;
import java.util.Map.Entry;

import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.junit.Before;
import org.junit.Test;

//客户端操作hdfs,是有一个用户身份的
//默认情况下,hdfs客户端api会从jvm中获取一个参数来作为自己的用户身份:-DHADOOP_USER_NAME=hadoop
public class HdfsClientDemo {

	private FileSystem fs;
	private Configuration conf;

	@Before
	public void init() throws Exception{
		
		conf = new Configuration();
		conf.set("dfs.replication", "2");

		fs = FileSystem.get(new URI("hdfs://192.168.153.136:9000"),conf,"hadoop");
	}
	
	// 获取conf配置参数
	@Test
	public void testConf(){
		Iterator> iterator = conf.iterator();
		while (iterator.hasNext()) {
			Entry ent = iterator.next();
			System.out.println(ent.getKey() + ": " + ent.getValue());
		}
	}
	
	// 上传文件到HDFS文件系统
	@Test
	public void testUpload() throws Exception {
		// (1)该方式是通过IO流的方式上传文件到HDFS文件系统
		//FileInputStream fileInputStream =new FileInputStream("e:/Spring MVC.docx");
		//FSDataOutputStream fsDataOutputStream = fs.create(new Path("/Spring MVC222.docx"));
		//IOUtils.copy(fileInputStream, fsDataOutputStream);
		
		// (2)方式二
		boolean windowsAbsolutePath = Path.isWindowsAbsolutePath("e:/Spring MVC.docx", true);
		System.out.println(windowsAbsolutePath);
		if (windowsAbsolutePath) {
			fs.copyFromLocalFile(new Path("e:/Spring MVC.docx"), new Path("/Spring MVC.docx"));
			fs.close();
		}
	}
	
	// 从HDFS文件系统下载文件到本地
	@Test
	public void testDownload() throws Exception {
		//(1)该方式不依赖于本地hadoop环境,直接通过IO流
		//FSDataInputStream fsDataInputStream=fs.open(new Path("/spring/Spring MVC.docx"));
        //FileOutputStream fileOutputStream=new FileOutputStream("e:/Spring MVC.docx");
        //IOUtils.copy(fsDataInputStream, fileOutputStream);
		
        // (2)fs.copyToLocalFile(new Path("/Spring MVC1111.docx"), new Path("e:/"));该方式依赖于本地hadoop环境,可以通过如下方式:参数1表示是否删除hdfs上的源文件,参数4表示是否使用java原生API
		fs.copyToLocalFile(false,new Path("/Spring MVC1111.docx"), new Path("e:/"),true);
		fs.close();
	}
	
	
	
	// 创建HDFS目录,根目录为/
	@Test
	public void makdirTest() throws Exception {
		boolean mkdirs = fs.mkdirs(new Path("/spring"));
		System.out.println(mkdirs);
	}
	
	@Test
	public void createTest() throws Exception{
		
	}
	
	// 删除目录或文件
	@Test
	public void deleteTest() throws Exception{
		// fs.exists():判断是否存在,可能不存在
		boolean file_exists = fs.exists(new Path("/spring"));  // true
		// fs.isFile():判断是否文件
		boolean file = fs.isFile(new Path("/Spring MVC.docx"));  // true
		// fs.isDirectory():判断一个对象是否是文件夹
		boolean directory1 = fs.isDirectory(new Path("/spring"));   // true
		boolean directory2 = fs.isDirectory(new Path("/Spring MVC.docx"));  // false
		
		System.out.println(file_exists);
		System.out.println(file);
		System.out.println(directory1);
		System.out.println(directory2);
		if (file_exists) {
			boolean delete = fs.delete(new Path("/spring"), true);//true, 递归删除
			System.out.println(delete);
		}
	}
	
	// 递归获得所有的文件
	@Test
	public void listTest() throws Exception{
		
		FileStatus[] listStatus = fs.listStatus(new Path("/"));
		for (FileStatus fileStatus : listStatus) {
			System.err.println(fileStatus.getPath()+"================="+fileStatus.toString());
		}
		
		//会递归找到所有的文件
		RemoteIterator listFiles = fs.listFiles(new Path("/"), true);
		while(listFiles.hasNext()){
			LocatedFileStatus fileStatus = listFiles.next();
			System.out.println("blocksize:" + fileStatus.getBlockSize());
			System.out.println("owner:" + fileStatus.getOwner());
			System.out.println("Replication:" + fileStatus.getReplication());
			System.out.println("Permission:" + fileStatus.getPermission());
			System.out.println("Path:" + fileStatus.getPath());
			System.out.println("FileName:" + fileStatus.getPath().getName());
			System.out.println("File Len:" + fileStatus.getLen());
			BlockLocation[] blockLocations = fileStatus.getBlockLocations();
			for (BlockLocation blockLocation : blockLocations) {
				System.out.println("块起始偏移量:" + blockLocation.getOffset());
				System.out.println("块长度:" + blockLocation.getLength());
				String[] hosts = blockLocation.getHosts();
				for (String datanode : hosts) {
					// 这里需要注意,块副本打印的结果是3台机器,而我们hadoop配置的只有dfs.replication为2个副本
					// 那么为什么会出现3个副本啦?
					// 这里是因为client的Configuration和hadoop的hdfs-site.xml配置是独立且分开的,当前是client则配置依赖于Configuration,此处Configuration并没有指明dfs.replication的配置,则默认是3
					// 如果要改变该配置,则需要在文件上传时指明conf.set("dfs.replication", "2");
					System.out.println("块副本位置:" + datanode);
				}
			}
		}
		
	}
	
	
	// 显示hdfs上文件的内容
	@Test
	public void testCat() throws Exception{
		FSDataInputStream in = fs.open(new Path("/Spring MVC.docx"));
		IOUtils.copy(in, System.out);
	}
	
}

 

你可能感兴趣的:(------,Hadoop,大数据)