HDFS相关操作

HDFS网页操作

Web Console: 50070、50090

HDFS的命令行操作

HDFS操作命令

-mkdir
-ls
-ls -R
-put
-moveFromLocal
-copyFromLocal
-copyToLocal
-get
-rm
-getmerge
-cp
-mv
-count
-du
-text、-cat
balancer

HDFS管理命令

-report
-safemode 

Java API

创建目录

package demo;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.Test;

/*
 * 原因:
 * Caused by: org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException): 
 * Permission denied: user=lenovo, access=WRITE, inode="/folder1":root:supergroup:drwxr-xr-x
 * 
 * 当前用户:lenovo 执行w权限
 *  HDFS的根的权限:root:supergroup:drwxr-xr-x
 *  
 *  四种方式,执行程序:
 *  1、设置一个属性
 *  2、使用-D参数
 *  3、改变目录的权限  hdfs dfs -chmod 777 /folder2
 *  4、dfs.permissions  ---> false  禁用HDFS的权限检查功能
 */
public class TestMkDir {
    @Test
    public void test1() throws Exception{
        //方式一:设置一个属性,代表用户的身份
        System.setProperty("HADOOP_USER_NAME", "root");
        //指定NameNode的地址
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://192.168.157.111:9000");
        //获取一个HDFS的客户端
        FileSystem client = FileSystem.get(conf);
        //创建目录
        client.mkdirs(new Path("/folder1"));    
        //关闭客户端
        client.close();
    }

    @Test
    public void test2() throws Exception{
        //指定NameNode的地址
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://192.168.157.111:9000");
        //获取一个HDFS的客户端
        FileSystem client = FileSystem.get(conf);
        //创建目录
        client.mkdirs(new Path("/folder2"));
        //关闭客户端
        client.close();
    }

    @Test
    public void test3() throws Exception{
        //指定NameNode的地址
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://192.168.157.111:9000");
        //获取一个HDFS的客户端
        FileSystem client = FileSystem.get(conf);
        //创建目录
        client.mkdirs(new Path("/folder2/folder3"));    
        //关闭客户端
        client.close();
    }

    @Test
    public void test4() throws Exception{
        //指定NameNode的地址
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://192.168.157.111:9000");
        //获取一个HDFS的客户端
        FileSystem client = FileSystem.get(conf);
        //创建目录
        client.mkdirs(new Path("/folder4"));
        //关闭客户端
        client.close();
    }
}

上传文件

HDFS相关操作_第1张图片

package demo;

import java.io.FileInputStream;
import java.io.InputStream;
import java.io.OutputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.junit.Test;

public class TestUpload {
    @Test
    public void test1() throws Exception{
        //构造一个输入流
        InputStream in = new FileInputStream("d:\\dowload\\hadoop-2.4.1.zip");
        //配置NameNode地址
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://192.168.157.111:9000");
        //客户端
        FileSystem client = FileSystem.get(conf);
        //得到一个输出流
        OutputStream out = client.create(new Path("/tools/a.zip"));
        //构造一个缓冲区
        byte[] buffer = new byte[1024];
        int len=0;
        while((len=in.read(buffer)) >0) {
            //读取到了数据
            out.write(buffer, 0, len);
        }
        out.flush();
        out.close();
        in.close();
    }

    @Test
    public void test2() throws Exception{
        //构造一个输入流
        InputStream in = new FileInputStream("d:\\dowload\\hadoop-2.4.1.zip");
        //配置NameNode地址
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://192.168.157.111:9000");
        //客户端
        FileSystem client = FileSystem.get(conf);
        //得到一个输出流
        OutputStream out = client.create(new Path("/tools/b.zip"));     
        //使用工具类简化程序
        IOUtils.copyBytes(in, out, 1024);
    }
}

下载文件

HDFS相关操作_第2张图片

package demo;

import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.junit.Test;

public class TestDownload {
    @Test
    public void test1() throws Exception{
        //配置NameNode地址
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://192.168.157.111:9000");
        //客户端
        FileSystem client = FileSystem.get(conf);
        //打开一个输入流 <------HDFS
        InputStream in = client.open(new Path("/tools/a.zip"));
        //构造一个输出流  ----> d:\temp\aa.zip
        OutputStream out = new FileOutputStream("d:\\temp\\bb.zip");
        //使用工具类简化程序
        IOUtils.copyBytes(in, out, 1024);
//      //构造一个缓冲区
//      byte[] buffer = new byte[1024];
//      int len=0;
//      while((len=in.read(buffer)) >0) {
//          //读取到了数据
//          out.write(buffer, 0, len);
//      }
//      out.flush();
//      out.close();
//      in.close();
    }
}

查看文件信息

package demo;

import java.util.Arrays;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.junit.Test;

public class TestMetaData {
    @Test
    public void testCheckFileInfo() throws Exception{
        //配置NameNode地址
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://192.168.157.111:9000");
        //客户端
        FileSystem client = FileSystem.get(conf);
        //获取该目录下所有文件的信息
        FileStatus[] filesStatus = client.listStatus(new Path("/tools"));
        for(FileStatus f:filesStatus){
            System.out.println(f.isDirectory()?"目录":"文件");
            System.out.println(f.getPath().getName());
            System.out.println(f.getBlockSize());
            System.out.println("*************************");
        }
        client.close();
    }

    @Test
    public void testCheckFileBlock() throws Exception{
        //配置NameNode地址
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://192.168.157.111:9000");
        //客户端
        FileSystem client = FileSystem.get(conf);
        //获取该文件的信息
        FileStatus fs = client.getFileStatus(new Path("/tools/a.zip"));
        //获取文件的数据块的信息
        BlockLocation[] location = client.getFileBlockLocations(fs, 0, fs.getLen());
        for(BlockLocation block:location){
            //block.getHosts() ---> 为什么返回一个String[]???          System.out.println(Arrays.toString(block.getHosts()) + "\t"+ Arrays.toString(block.getNames()));
        }
        client.close();
    }
}

HDFS的高级功能

1. 回收站

HDFS回收站的本质:ctrl +x 移动到一个隐藏目录
修改 core-site.xml 文件:

//默认禁用
<property>
   <name>fs.trash.intervalname>
   <value>1440value>
property>

没有回收站时

日志:
18/04/09 21:35:40 INFO fs.TrashPolicyDefault: Namenode trash configuration: Deletion interval = 0 minutes, Emptier interval = 0 minutes.Deleted /tools ---> 成功删除(对比:回收站)

有回收站时

Moved: 'hdfs://bigdata111:9000/tools/a.zip' to trash at: hdfs://bigdata111:9000/user/root/.Trash/Current

查看回收站

hdfs dfs -lsr /user/root/.Trash/Current

从回收站中恢复

hdfs dfs -cp /user/root/.Trash/Current/tools/a.zip /tools   

2、快照

本质:cp命令
管理命令

[-allowSnapshot ]
[-disallowSnapshot ]

操作命令

[-createSnapshot  []]
[-deleteSnapshot  ]
[-renameSnapshot   ]

开启快照

hdfs dfsadmin -allowSnapshot /students

创建一个备份

hdfs dfs -createSnapshot /students backup_student_0411_01
日志:Created snapshot /students/.snapshot/backup_student_0411_01
hdfs dfs -put student03.txt /students
hdfs dfs -createSnapshot /students backup_student_0411_02

恢复快照

hdfs dfs -cp /input/.snapshot/backup_input_01/data.txt /input

3、配额

名称配额: 限制某个目录下,文件的个数

[-setQuota  ...]
[-clrQuota ...]   
hdfs dfs -mkdir /folder1
hdfs dfsadmin -setQuota 3 /folder1
实际是:N-1

空间配额: 限制某个目录下,文件的大小

[-setSpaceQuota  [-storageType ] ...]
[-clrSpaceQuota [-storageType ] ...] 

设置空间配额:1M

hdfs dfs -mkdir /folder2
hdfs dfsadmin -setSpaceQuota 1M /folder2

错误:

The DiskSpace quota of /folder2 is exceeded: quota = 1048576 B = 1 MB but diskspace consumed = 134217728 B = 128 MB

注意:设置的值一定不能小于128M

4、安全模式

hdfs dfsadmin -safemode get 查看安全模式状态
hdfs dfsadmin -safemode enter  进入安全模式状态
hdfs dfsadmin -safemode leave 离开安全模式

你可能感兴趣的:(大数据笔记)