(所有源码均在 https://github.com/zongzhec/HadoopPractise)
1. 安装hadoop
2. 创建一个Maven/Gradle工程
3. 导入相应的依赖
junit
junit
4.12
test
org.apache.logging.log4j
log4j
2.12.1
org.apache.hadoop
hadoop-hdfs
2.7.2
jdk.tools
jdk.tools
1.8
system
${JAVA_HOME}/lib/tools.jar
4. 新建log4j.properties
log4j.rootLogger=INFO, stdout
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d %p [%c] - %m%n
# 日志文件(logFile)
log4j.appender.logfile=org.apache.log4j.FileAppender
log4j.appender.logfile.File=target/spring.log
log4j.appender.logfile.layout=org.apache.log4j.PatternLayout
log4j.appender.logfile.layout.ConversionPattern=%d %p [%c] - %m%n
5. 新建类HDFSClient
public static void main(String[] args) {
HDFSClientDemo demo = new HDFSClientDemo();
try {
demo.listFiles();
demo.ls();
// demo.put();
demo.rename();
} catch (IOException e) {
e.printStackTrace();
}
}
public void put() throws IOException {
// 获取HDFS的抽象封装对象
FileSystem fileSystem = FileSystem.get(URI.create("hdfs://hadoop102:9000"), new Configuration());
// 用此对象操作文件系统
fileSystem.copyFromLocalFile(new Path("E:\\input.txt"), new Path("/"));
fileSystem.close(); // HDFS不支持并发写入,因此如果不关,别的进程写不进去
}
6. 启动dfs和yarn
7. 运行HDFSClient
1. 如何解决AccessControlException: Permission denied 问题:这里。
1. 上传文件
public void put() throws IOException {
// 获取HDFS的抽象封装对象
FileSystem fileSystem = FileSystem.get(URI.create("hdfs://hadoop102:9000"), new Configuration());
// 用此对象操作文件系统
fileSystem.copyFromLocalFile(new Path("E:\\input.txt"), new Path("/"));
fileSystem.close(); // HDFS不支持并发写入,因此如果不关,别的进程写不进去
}
2. 下载文件
public void get() throws IOException {
FileSystem fileSystem = FileSystem.get(URI.create("hdfs://hadoop102:9000"), new Configuration());
fileSystem.copyToLocalFile(new Path("/"), new Path("E:\\"));
fileSystem.close();
}
3. 重命名文件
public void rename() throws IOException {
FileSystem fileSystem = FileSystem.get(URI.create("hdfs://hadoop102:9000"), new Configuration());
fileSystem.rename(new Path("/wcoutput"), new Path("/wcoutput_rename"));
fileSystem.close();
}
4. 扫描文件并判断是否是文件夹
/**
* 查看文件状态
* 注意,此方法不只查文件,也查文件夹
*/
public void ls() throws IOException {
FileSystem fileSystem = FileSystem.get(URI.create("hdfs://hadoop102:9000"), new Configuration());
System.out.println("Listing files and dirs");
FileStatus[] fileStatuses = fileSystem.listStatus(new Path("/"));
for (FileStatus fileStatus : fileStatuses) {
if (fileStatus.isFile()) {
System.out.print("检测到一个文件: " + fileStatus.getPath());
} else {
System.out.println("检测到一个文件夹: " + fileStatus.getPath());
}
}
fileSystem.close();
}
5. 扫描文件并检索块信息
/**
* 查看文件状态
* 注意,此方法只查文件,不查文件夹
*/
public void listFiles() throws IOException {
FileSystem fileSystem = FileSystem.get(URI.create("hdfs://hadoop102:9000"), new Configuration());
System.out.println("Listing files");
RemoteIterator files = fileSystem.listFiles(new Path("/"), true);
while (files.hasNext()) {
LocatedFileStatus file = files.next();
System.out.println(file.getPath());
BlockLocation[] blockLocations = file.getBlockLocations();
System.out.println("块信息:");
for (BlockLocation blockLocation : blockLocations) {
String[] hosts = blockLocation.getHosts();
System.out.print("块在 ");
for (String host : hosts) {
System.out.print(host + " ");
}
System.out.println("里");
}
}
fileSystem.close();
}