HDFS 文件系统提供了相当多的shell 操作命令,大大方便了程序员和系统管理人员查看、修改HDFS 上的文件。进一步,HDFS 的操作命令和Unix/Linux 的命令名称和格式相当一致,因而学习HDFS 命令的成本也大为缩小。
注:hdfs dfs
等同于hadoop fs
,这里均以hdfs dfs演示
[root@node01 ~]# hdfs dfs
Usage: hadoop fs [generic options]
[-appendToFile <localsrc> ... <dst>]
[-cat [-ignoreCrc] <src> ...]
[-checksum <src> ...]
[-chgrp [-R] GROUP PATH...]
[-chmod [-R] <MODE[,MODE]... | OCTALMODE> PATH...]
[-chown [-R] [OWNER][:[GROUP]] PATH...]
[-copyFromLocal [-f] [-p] [-l] <localsrc> ... <dst>]
[-copyToLocal [-p] [-ignoreCrc] [-crc] <src> ... <localdst>]
[-count [-q] [-h] [-v] [-x] <path> ...]
[-cp [-f] [-p | -p[topax]] <src> ... <dst>]
[-createSnapshot <snapshotDir> [<snapshotName>]]
[-deleteSnapshot <snapshotDir> <snapshotName>]
[-df [-h] [<path> ...]]
[-du [-s] [-h] [-x] <path> ...]
[-expunge]
[-find <path> ... <expression> ...]
[-get [-p] [-ignoreCrc] [-crc] <src> ... <localdst>]
[-getfacl [-R] <path>]
[-getfattr [-R] {-n name | -d} [-e en] <path>]
[-getmerge [-nl] <src> <localdst>]
[-help [cmd ...]]
[-ls [-C] [-d] [-h] [-q] [-R] [-t] [-S] [-r] [-u] [<path> ...]]
[-mkdir [-p] <path> ...]
[-moveFromLocal <localsrc> ... <dst>]
[-moveToLocal <src> <localdst>]
[-mv <src> ... <dst>]
[-put [-f] [-p] [-l] <localsrc> ... <dst>]
[-renameSnapshot <snapshotDir> <oldName> <newName>]
[-rm [-f] [-r|-R] [-skipTrash] <src> ...]
[-rmdir [--ignore-fail-on-non-empty] <dir> ...]
[-setfacl [-R] [{-b|-k} {-m|-x <acl_spec>} <path>]|[--set <acl_spec> <path>]]
[-setfattr {-n name [-v value] | -x name} <path>]
[-setrep [-R] [-w] <rep> <path> ...]
[-stat [format] <path> ...]
[-tail [-f] <file>]
[-test -[defsz] <path>]
[-text [-ignoreCrc] <src> ...]
[-touchz <path> ...]
[-usage [cmd ...]]
注:hdfs dfs等同于hadoop fs,这里均以hdfs dfs演示
//查看根目录文件
[root@node01 ~]# hdfs dfs -ls /
Found 4 items
drwxrwxrwx - root supergroup 0 2020-03-25 11:21 /data
drwxrwxrwx - root supergroup 0 2020-03-25 12:58 /hbase
drwxrwxrwt - root supergroup 0 2020-03-24 16:35 /tmp
drwxrwxrwx - root supergroup 0 2020-03-26 17:30 /user
//查看/user/hive/ 目录下文件
[root@node01 ~]# hdfs dfs -ls /user/hive/
Found 3 items
drwxrwxrwx - root supergroup 0 2020-04-10 18:00 /user/hive/.Trash
drwx------ - root supergroup 0 2020-04-09 16:58 /user/hive/.staging
drwxrwxrwt - root supergroup 0 2020-04-03 17:57 /user/hive/warehouse
//创建目录(文件夹)
//创建/user/hive/test/目录
[root@node01 ~]# hdfs dfs -mkdir /user/hive/test/
[root@node01 ~]# hdfs dfs -ls /user/hive/
Found 4 items
drwxrwxrwx - root supergroup 0 2020-04-10 18:00 /user/hive/.Trash
drwx------ - root supergroup 0 2020-04-09 16:58 /user/hive/.staging
drwxr-xr-x - root supergroup 0 2020-04-11 15:24 /user/hive/test
drwxrwxrwt - root supergroup 0 2020-04-03 17:57 /user/hive/warehouse
//创建文件
// 在/user/hive/test/文件夹下创建a.txt
[root@node01 ~]# hdfs dfs -touchz /user/hive/test/a.txt
[root@node01 ~]# hdfs dfs -ls /user/hive/test/
Found 1 items
-rw-r--r-- 3 root supergroup 0 2020-04-11 15:50 /user/hive/test/a.txt
//文件复制
[root@node01 ~]# hdfs dfs -cp /user/hive/test/a.txt /user/hive/
//文件别名
[root@node01 ~]# hdfs dfs -mv /user/hive/test/a.txt /user/hive/test/b.txt
[root@node01 ~]# hdfs dfs -ls /user/hive/test/
Found 1 items
-rw-r--r-- 3 root supergroup 0 2020-04-11 15:57 /user/hive/test/b.txt
//文件移动
[root@node01 ~]# hdfs dfs -mv /user/hive/test/b.txt /user/hive/
[root@node01 ~]# hdfs dfs -ls /user/hive/
Found 6 items
drwxrwxrwx - root supergroup 0 2020-04-10 18:00 /user/hive/.Trash
drwx------ - root supergroup 0 2020-04-09 16:58 /user/hive/.staging
-rw-r--r-- 3 root supergroup 0 2020-04-11 15:57 /user/hive/a.txt
-rw-r--r-- 3 root supergroup 0 2020-04-11 16:00 /user/hive/b.txt
drwxr-xr-x - root supergroup 0 2020-04-11 16:01 /user/hive/test
drwxrwxrwt - root supergroup 0 2020-04-03 17:57 /user/hive/warehouse
//删除文件或者文件夹(目录)
[root@node01 ~]# hdfs dfs -rm -r /user/hive/a.txt
20/04/11 16:08:14 INFO fs.TrashPolicyDefault: Moved:
//删除目录(文件夹)命令同上
[root@node01 ~]# hdfs dfs -rm -r /user/hive/test/
20/04/11 16:10:06 INFO fs.TrashPolicyDefault: Moved
//本地上传
//创建本地文本并上传到hdfs
[root@node01 data]# vim hello.txt
this is a test!
[root@node01 data]# hdfs dfs -put hello.txt /user/hive/
[root@node01 test]# hdfs dfs -ls /user/hive/
Found 5 items
drwxrwxrwx - root supergroup 0 2020-04-10 18:00 /user/hive/.Trash
drwx------ - root supergroup 0 2020-04-09 16:58 /user/hive/.staging
-rw-r--r-- 3 root supergroup 0 2020-04-11 16:00 /user/hive/b.txt
-rw-r--r-- 3 root supergroup 16 2020-04-11 16:16 /user/hive/hello.txt
drwxrwxrwt - root supergroup 0 2020-04-03 17:57 /user/hive/warehouse
//hdfs文件复制到本地:将hello.txt 复制到本地/data/test/目录下
# hdfs dfs -get /user/hive/hello.txt /data/test/
[root@node01 test]# ls
hello.txt
[root@node01 test]# pwd
/data/test
//下面两种方式类似
[root@node01 ~]# hdfs dfs -cat /user/hive/hello.txt
//text 命令 :将文本文件或某些格式的非文本文件通过文本格式输出
[root@node01 ~]# hdfs dfs -text /user/hive/hello.txt
//查看文件末尾
[root@node01 ~]# hdfs dfs -tail /user/hive/hello.txt
//修改权限
//给根目录赋777最高权限
#hdfs dfs -chmod 777 /
//给指定目录赋权限(建议用第一种)
//给/hbase目录赋执行权限
#hdfs dfs -chmod -R 776 /hbase
#hdfs dfs -chmod -R o-x /hbase
//切换到hdfs用户
[root@node01 ~]# su hdfs
[hdfs@node01 root]$
//修改用户和组
//将所有目录指定为root用户
#hdfs dfs -chown -R root:supergroup /
//将user目录指定给cdh用户
#hdfs dfs -chown -R cdh:supergroup /user
//统计文件系统的可用空间信息
[root@node01 ~]# hdfs dfs -df -h
Filesystem Size Used Available Use%
hdfs://node01:8020 3.9 T 65.5 G 3.8 T 2%
//统计文件夹的大小信息
[root@node01 ~]# hdfs dfs -du -s -h /user/hive/
8.3 G 25.0 G /user/hive
//统计文件夹的大小信息
[root@node01 ~]# hdfs dfs -du /user/hive/
0 0 /user/hive/.Trash
0 0 /user/hive/.staging
0 0 /user/hive/b.txt
16 48 /user/hive/hello.txt
8954472817 26863418451 /user/hive/warehouse
//hdfs dfsadmin -report :显示文件系统的基本数据
[root@node01 ~]# hdfs dfsadmin -report
Configured Capacity: 4251774369792 (3.87 TB)
Present Capacity: 4245645967521 (3.86 TB)
DFS Remaining: 4175276711936 (3.80 TB)
DFS Used: 70369255585 (65.54 GB)
DFS Used%: 1.66%
Under replicated blocks: 0
Blocks with corrupt replicas: 0
Missing blocks: 0
Missing blocks (with replication factor 1): 0
# hdfs dfsadmin
Usage: hdfs dfsadmin
Note: Administrative commands can only be run as the HDFS superuser.
[-report [-live] [-dead] [-decommissioning]]
[-safemode <enter | leave | get | wait>]
[-saveNamespace]
[-rollEdits]
[-restoreFailedStorage true|false|check]
[-refreshNodes]
[-setQuota <quota> <dirname>...<dirname>]
[-clrQuota <dirname>...<dirname>]
[-setSpaceQuota <quota> [-storageType <storagetype>] <dirname>...<dirname>]
[-clrSpaceQuota [-storageType <storagetype>] <dirname>...<dirname>]
[-finalizeUpgrade]
[-rollingUpgrade [<query|prepare|finalize>]]
[-refreshServiceAcl]
[-refreshUserToGroupsMappings]
[-refreshSuperUserGroupsConfiguration]
[-refreshCallQueue]
[-refresh <host:ipc_port> <key> [arg1..argn]
[-reconfig <datanode|...> <host:ipc_port> <start|status|properties>]
[-printTopology]
[-refreshNamenodes datanode_host:ipc_port]
[-deleteBlockPool datanode_host:ipc_port blockpoolId [force]]
[-setBalancerBandwidth <bandwidth in bytes per second>]
[-getBalancerBandwidth <datanode_host:ipc_port>]
[-fetchImage <local directory>]
[-allowSnapshot <snapshotDir>]
[-disallowSnapshot <snapshotDir>]
[-shutdownDatanode <datanode_host:ipc_port> [upgrade]]
[-evictWriters <datanode_host:ipc_port>]
[-getDatanodeInfo <datanode_host:ipc_port>]
[-metasave filename]
[-triggerBlockReport [-incremental] <datanode_host:ipc_port>]
[-help [cmd]]
#hdfs balancer
如果管理员发现某些DataNode保存数据过多,某些DataNode保存数据相对较少,可以使用上述命令手动启动内部的均衡过程