org.apache.hadoop
hadoop-common
3.1.0
org.apache.hadoop
hadoop-hdfs
3.1.0
org.apache.hadoop
hadoop-mapreduce-client-core
3.1.0
com.google.guava
guava
27.0.1-jre
com.alibaba
fastjson
1.2.44
import com.alibaba.fastjson.JSON;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.nio.charset.Charset;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* HDFS相关的基本操作
*
* @author adminstrator
* @since 1.0.0
*/
public class HdfsService {
private Logger logger = LoggerFactory.getLogger(HdfsService.class);
private Configuration conf = null;
/**
* 默认的HDFS路径,比如:hdfs://192.168.197.130:9000
*/
private String defaultHdfsUri;
public HdfsService(Configuration conf,String defaultHdfsUri) {
this.conf = conf;
this.defaultHdfsUri = defaultHdfsUri;
}
/**
* 获取HDFS文件系统
* @return org.apache.hadoop.fs.FileSystem
*/
private FileSystem getFileSystem() throws IOException {
return FileSystem.get(conf);
}
/**
* 创建HDFS目录
* @author adminstrator
* @since 1.0.0
* @param path HDFS的相对目录路径,比如:/testDir
* @return boolean 是否创建成功
*/
public boolean mkdir(String path){
//如果目录已经存在,则直接返回
if(checkExists(path)){
return true;
}else{
FileSystem fileSystem = null;
try {
fileSystem = getFileSystem();
//最终的HDFS文件目录
String hdfsPath = generateHdfsPath(path);
//创建目录
return fileSystem.mkdirs(new Path(hdfsPath));
} catch (IOException e) {
logger.error(MessageFormat.format("创建HDFS目录失败,path:{0}",path),e);
return false;
}finally {
close(fileSystem);
}
}
}
/**
* 上传文件至HDFS
* @author adminstrator
* @since 1.0.0
* @param srcFile 本地文件路径,比如:D:/test.txt
* @param dstPath HDFS的相对目录路径,比如:/testDir
*/
public void uploadFileToHdfs(String srcFile, String dstPath){
this.uploadFileToHdfs(false, true, srcFile, dstPath);
}
/**
* 上传文件至HDFS
* @author adminstrator
* @since 1.0.0
* @param delSrc 是否删除本地文件
* @param overwrite 是否覆盖HDFS上面的文件
* @param srcFile 本地文件路径,比如:D:/test.txt
* @param dstPath HDFS的相对目录路径,比如:/testDir
*/
public void uploadFileToHdfs(boolean delSrc, boolean overwrite, String srcFile, String dstPath){
//源文件路径
Path localSrcPath = new Path(srcFile);
//目标文件路径
Path hdfsDstPath = new Path(generateHdfsPath(dstPath));
FileSystem fileSystem = null;
try {
fileSystem = getFileSystem();
fileSystem.copyFromLocalFile(delSrc,overwrite,localSrcPath,hdfsDstPath);
} catch (IOException e) {
logger.error(MessageFormat.format("上传文件至HDFS失败,srcFile:{0},dstPath:{1}",srcFile,dstPath),e);
}finally {
close(fileSystem);
}
}
/**
* 判断文件或者目录是否在HDFS上面存在
* @author adminstrator
* @since 1.0.0
* @param path HDFS的相对目录路径,比如:/testDir、/testDir/a.txt
* @return boolean
*/
public boolean checkExists(String path){
FileSystem fileSystem = null;
try {
fileSystem = getFileSystem();
//最终的HDFS文件目录
String hdfsPath = generateHdfsPath(path);
//创建目录
return fileSystem.exists(new Path(hdfsPath));
} catch (IOException e) {
logger.error(MessageFormat.format("'判断文件或者目录是否在HDFS上面存在'失败,path:{0}",path),e);
return false;
}finally {
close(fileSystem);
}
}
/**
* 获取HDFS上面的某个路径下面的所有文件或目录(不包含子目录)信息
* @author adminstrator
* @since 1.0.0
* @param path HDFS的相对目录路径,比如:/testDir
* @return java.util.List>
*/
public List
import io.renren.service.HdfsService;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
/**
* HDFS相关配置
*
* @author adminstrator
* @since 1.0.0
*/
@Configuration
public class HdfsConfig {
private String defaultHdfsUri = "hdfs://192.168.0.9:9000";
@Bean
public HdfsService getHbaseService(){
org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration();
conf.set("fs.defaultFS",defaultHdfsUri);
return new HdfsService(conf,defaultHdfsUri);
}
}
import io.renren.entity.SysUserEntity;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FSDataInputStream;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
import org.springframework.test.context.web.WebAppConfiguration;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.List;
import java.util.Map;
/**
* 测试HDFS的基本操作
*
* @author adminstrator
* @since 1.0.0
*/
@RunWith(SpringJUnit4ClassRunner.class)
@SpringBootTest
@WebAppConfiguration
public class TestHdfs {
@Autowired
private HdfsService hdfsService;
/**
* 测试创建HDFS目录
*/
@Test
public void testMkdir(){
boolean result1 = hdfsService.mkdir("/testDir");
System.out.println("创建结果:" + result1);
boolean result2 = hdfsService.mkdir("/testDir/subDir");
System.out.println("创建结果:" + result2);
}
/**
* 测试上传文件
*/
@Test
public void testUploadFile(){
//测试上传三个文件
hdfsService.uploadFileToHdfs("C:/Users/yanglei/Desktop/a.txt","/testDir");
hdfsService.uploadFileToHdfs("C:/Users/yanglei/Desktop/b.txt","/testDir");
hdfsService.uploadFileToHdfs("C:/Users/yanglei/Desktop/c.txt","/testDir/subDir");
}
/**
* 测试列出某个目录下面的文件
*/
@Test
public void testListFiles(){
List> result = hdfsService.listFiles("/testDir",null);
result.forEach(fileMap -> {
fileMap.forEach((key,value) -> {
System.out.println(key + "--" + value);
});
System.out.println();
});
}
/**
* 测试下载文件
*/
@Test
public void testDownloadFile(){
hdfsService.downloadFileFromHdfs("/testDir/a.txt","C:/Users/yanglei/Desktop/test111.txt");
}
/**
* 测试打开HDFS上面的文件
*/
@Test
public void testOpen() throws IOException {
FSDataInputStream inputStream = hdfsService.open("/testDir/a.txt");
BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));
String line = null;
while((line = reader.readLine())!=null){
System.out.println(line);
}
reader.close();
}
/**
* 测试打开HDFS上面的文件,并转化为Java对象
*/
@Test
public void testOpenWithObject() throws IOException {
SysUserEntity user = hdfsService.openWithObject("/testDir/b.txt", SysUserEntity.class);
System.out.println(user);
}
/**
* 测试重命名
*/
@Test
public void testRename(){
hdfsService.rename("/testDir/b.txt","/testDir/b_new.txt");
//再次遍历
testListFiles();
}
/**
* 测试删除文件
*/
@Test
public void testDelete(){
hdfsService.delete("/testDir/b_new.txt");
//再次遍历
testListFiles();
}
/**
* 测试获取某个文件在HDFS集群的位置
*/
@Test
public void testGetFileBlockLocations() throws IOException {
BlockLocation[] locations = hdfsService.getFileBlockLocations("/testDir/a.txt");
if(locations != null && locations.length > 0){
for(BlockLocation location : locations){
System.out.println(location.getHosts()[0]);
}
}
}
}
HDFS集群搭建及相关配置:HDFS3.x集群配置
可能遇到的问题及解决办法:Java API操作Hadoop可能会遇到的问题以及解决办法