HDFS常用命令整理

HDFS 文件系统提供了相当多的shell 操作命令,大大方便了程序员和系统管理人员查看、修改HDFS 上的文件。进一步,HDFS 的操作命令和Unix/Linux 的命令名称和格式相当一致,因而学习HDFS 命令的成本也大为缩小。

一.命令帮助

注:hdfs dfs等同于hadoop fs,这里均以hdfs dfs演示

[root@node01 ~]# hdfs dfs
Usage: hadoop fs [generic options]
	[-appendToFile <localsrc> ... <dst>]
	[-cat [-ignoreCrc] <src> ...]
	[-checksum <src> ...]
	[-chgrp [-R] GROUP PATH...]
	[-chmod [-R] <MODE[,MODE]... | OCTALMODE> PATH...]
	[-chown [-R] [OWNER][:[GROUP]] PATH...]
	[-copyFromLocal [-f] [-p] [-l] <localsrc> ... <dst>]
	[-copyToLocal [-p] [-ignoreCrc] [-crc] <src> ... <localdst>]
	[-count [-q] [-h] [-v] [-x] <path> ...]
	[-cp [-f] [-p | -p[topax]] <src> ... <dst>]
	[-createSnapshot <snapshotDir> [<snapshotName>]]
	[-deleteSnapshot <snapshotDir> <snapshotName>]
	[-df [-h] [<path> ...]]
	[-du [-s] [-h] [-x] <path> ...]
	[-expunge]
	[-find <path> ... <expression> ...]
	[-get [-p] [-ignoreCrc] [-crc] <src> ... <localdst>]
	[-getfacl [-R] <path>]
	[-getfattr [-R] {-n name | -d} [-e en] <path>]
	[-getmerge [-nl] <src> <localdst>]
	[-help [cmd ...]]
	[-ls [-C] [-d] [-h] [-q] [-R] [-t] [-S] [-r] [-u] [<path> ...]]
	[-mkdir [-p] <path> ...]
	[-moveFromLocal <localsrc> ... <dst>]
	[-moveToLocal <src> <localdst>]
	[-mv <src> ... <dst>]
	[-put [-f] [-p] [-l] <localsrc> ... <dst>]
	[-renameSnapshot <snapshotDir> <oldName> <newName>]
	[-rm [-f] [-r|-R] [-skipTrash] <src> ...]
	[-rmdir [--ignore-fail-on-non-empty] <dir> ...]
	[-setfacl [-R] [{-b|-k} {-m|-x <acl_spec>} <path>]|[--set <acl_spec> <path>]]
	[-setfattr {-n name [-v value] | -x name} <path>]
	[-setrep [-R] [-w] <rep> <path> ...]
	[-stat [format] <path> ...]
	[-tail [-f] <file>]
	[-test -[defsz] <path>]
	[-text [-ignoreCrc] <src> ...]
	[-touchz <path> ...]
	[-usage [cmd ...]]

注:hdfs dfs等同于hadoop fs,这里均以hdfs dfs演示

二.文件操作

1.查看文件和文件夹
//查看根目录文件
[root@node01 ~]# hdfs dfs -ls /
Found 4 items
drwxrwxrwx   - root supergroup          0 2020-03-25 11:21 /data
drwxrwxrwx   - root supergroup          0 2020-03-25 12:58 /hbase
drwxrwxrwt   - root supergroup          0 2020-03-24 16:35 /tmp
drwxrwxrwx   - root supergroup          0 2020-03-26 17:30 /user

//查看/user/hive/ 目录下文件
[root@node01 ~]# hdfs dfs -ls /user/hive/
Found 3 items
drwxrwxrwx   - root supergroup          0 2020-04-10 18:00 /user/hive/.Trash
drwx------   - root supergroup          0 2020-04-09 16:58 /user/hive/.staging
drwxrwxrwt   - root supergroup          0 2020-04-03 17:57 /user/hive/warehouse
2.创建文件和文件夹
//创建目录(文件夹)
//创建/user/hive/test/目录
[root@node01 ~]# hdfs dfs -mkdir /user/hive/test/
[root@node01 ~]# hdfs dfs -ls /user/hive/
Found 4 items
drwxrwxrwx   - root supergroup          0 2020-04-10 18:00 /user/hive/.Trash
drwx------   - root supergroup          0 2020-04-09 16:58 /user/hive/.staging
drwxr-xr-x   - root supergroup          0 2020-04-11 15:24 /user/hive/test
drwxrwxrwt   - root supergroup          0 2020-04-03 17:57 /user/hive/warehouse

//创建文件
// 在/user/hive/test/文件夹下创建a.txt
[root@node01 ~]# hdfs dfs -touchz /user/hive/test/a.txt 
[root@node01 ~]# hdfs dfs -ls /user/hive/test/
Found 1 items
-rw-r--r--   3 root supergroup          0 2020-04-11 15:50 /user/hive/test/a.txt
3.复制和移动
//文件复制
[root@node01 ~]# hdfs dfs -cp /user/hive/test/a.txt /user/hive/

//文件别名
[root@node01 ~]# hdfs dfs -mv /user/hive/test/a.txt /user/hive/test/b.txt
[root@node01 ~]# hdfs dfs -ls /user/hive/test/
Found 1 items
-rw-r--r--   3 root supergroup          0 2020-04-11 15:57 /user/hive/test/b.txt

//文件移动
[root@node01 ~]# hdfs dfs -mv /user/hive/test/b.txt /user/hive/
[root@node01 ~]#  hdfs dfs -ls /user/hive/
Found 6 items
drwxrwxrwx   - root supergroup          0 2020-04-10 18:00 /user/hive/.Trash
drwx------   - root supergroup          0 2020-04-09 16:58 /user/hive/.staging
-rw-r--r--   3 root supergroup          0 2020-04-11 15:57 /user/hive/a.txt
-rw-r--r--   3 root supergroup          0 2020-04-11 16:00 /user/hive/b.txt
drwxr-xr-x   - root supergroup          0 2020-04-11 16:01 /user/hive/test
drwxrwxrwt   - root supergroup          0 2020-04-03 17:57 /user/hive/warehouse

4.删除
//删除文件或者文件夹(目录)
[root@node01 ~]# hdfs dfs -rm -r /user/hive/a.txt
20/04/11 16:08:14 INFO fs.TrashPolicyDefault: Moved:
//删除目录(文件夹)命令同上
[root@node01 ~]# hdfs dfs -rm -r /user/hive/test/
20/04/11 16:10:06 INFO fs.TrashPolicyDefault: Moved

5.文件上传和下载(hdfs文件的父目录一定要存在,否则命令不会执行)
//本地上传
//创建本地文本并上传到hdfs
[root@node01 data]# vim hello.txt
this is a test!
[root@node01 data]# hdfs dfs -put hello.txt /user/hive/
[root@node01 test]# hdfs dfs -ls /user/hive/
Found 5 items
drwxrwxrwx   - root supergroup          0 2020-04-10 18:00 /user/hive/.Trash
drwx------   - root supergroup          0 2020-04-09 16:58 /user/hive/.staging
-rw-r--r--   3 root supergroup          0 2020-04-11 16:00 /user/hive/b.txt
-rw-r--r--   3 root supergroup         16 2020-04-11 16:16 /user/hive/hello.txt
drwxrwxrwt   - root supergroup          0 2020-04-03 17:57 /user/hive/warehouse

//hdfs文件复制到本地:将hello.txt 复制到本地/data/test/目录下
# hdfs dfs -get /user/hive/hello.txt /data/test/
[root@node01 test]# ls
hello.txt
[root@node01 test]# pwd
/data/test
6.显示文件内容
//下面两种方式类似
[root@node01 ~]# hdfs dfs -cat /user/hive/hello.txt
//text 命令 :将文本文件或某些格式的非文本文件通过文本格式输出
[root@node01 ~]# hdfs dfs -text /user/hive/hello.txt
//查看文件末尾
[root@node01 ~]# hdfs dfs -tail /user/hive/hello.txt

三、文件权限操作

//修改权限
//给根目录赋777最高权限
#hdfs dfs -chmod 777 /

//给指定目录赋权限(建议用第一种)
//给/hbase目录赋执行权限
#hdfs dfs -chmod -R 776 /hbase
#hdfs dfs -chmod -R o-x /hbase

HDFS常用命令整理_第1张图片

四、用户组操作

//切换到hdfs用户
[root@node01 ~]# su hdfs
[hdfs@node01 root]$ 

//修改用户和组
//将所有目录指定为root用户
#hdfs dfs -chown -R root:supergroup /

//将user目录指定给cdh用户
#hdfs dfs -chown -R cdh:supergroup /user

五、磁盘空间

//统计文件系统的可用空间信息 
[root@node01 ~]# hdfs dfs -df -h
Filesystem           Size    Used  Available  Use%
hdfs://node01:8020  3.9 T  65.5 G      3.8 T    2%

//统计文件夹的大小信息 
[root@node01 ~]# hdfs dfs -du -s -h /user/hive/
8.3 G  25.0 G  /user/hive 

//统计文件夹的大小信息 
[root@node01 ~]# hdfs dfs -du /user/hive/
0           0            /user/hive/.Trash
0           0            /user/hive/.staging
0           0            /user/hive/b.txt
16          48           /user/hive/hello.txt
8954472817  26863418451  /user/hive/warehouse

//hdfs dfsadmin -report :显示文件系统的基本数据
[root@node01 ~]# hdfs dfsadmin -report
Configured Capacity: 4251774369792 (3.87 TB)
Present Capacity: 4245645967521 (3.86 TB)
DFS Remaining: 4175276711936 (3.80 TB)
DFS Used: 70369255585 (65.54 GB)
DFS Used%: 1.66%
Under replicated blocks: 0
Blocks with corrupt replicas: 0
Missing blocks: 0
Missing blocks (with replication factor 1): 0

六、管理命令

# hdfs dfsadmin
Usage: hdfs dfsadmin
Note: Administrative commands can only be run as the HDFS superuser.
        [-report [-live] [-dead] [-decommissioning]]
        [-safemode <enter | leave | get | wait>]
        [-saveNamespace]
        [-rollEdits]
        [-restoreFailedStorage true|false|check]
        [-refreshNodes]
        [-setQuota <quota> <dirname>...<dirname>]
        [-clrQuota <dirname>...<dirname>]
        [-setSpaceQuota <quota> [-storageType <storagetype>] <dirname>...<dirname>]
        [-clrSpaceQuota [-storageType <storagetype>] <dirname>...<dirname>]
        [-finalizeUpgrade]
        [-rollingUpgrade [<query|prepare|finalize>]]
        [-refreshServiceAcl]
        [-refreshUserToGroupsMappings]
        [-refreshSuperUserGroupsConfiguration]
        [-refreshCallQueue]
        [-refresh <host:ipc_port> <key> [arg1..argn]
        [-reconfig <datanode|...> <host:ipc_port> <start|status|properties>]
        [-printTopology]
        [-refreshNamenodes datanode_host:ipc_port]
        [-deleteBlockPool datanode_host:ipc_port blockpoolId [force]]
        [-setBalancerBandwidth <bandwidth in bytes per second>]
        [-getBalancerBandwidth <datanode_host:ipc_port>]
        [-fetchImage <local directory>]
        [-allowSnapshot <snapshotDir>]
        [-disallowSnapshot <snapshotDir>]
        [-shutdownDatanode <datanode_host:ipc_port> [upgrade]]
        [-evictWriters <datanode_host:ipc_port>]
        [-getDatanodeInfo <datanode_host:ipc_port>]
        [-metasave filename]
        [-triggerBlockReport [-incremental] <datanode_host:ipc_port>]
        [-help [cmd]]
#hdfs balancer
如果管理员发现某些DataNode保存数据过多,某些DataNode保存数据相对较少,可以使用上述命令手动启动内部的均衡过程

你可能感兴趣的:(Hadoop生态圈)