https://www.cloudera.com/documentation/enterprise/release-notes/topics/cdh_vd_cdh5_maven_repo.html
https://www.cloudera.com/documentation/enterprise/release-notes/topics/cdh_vd_cdh5_maven_repo_514x.html
第一种方式:调用FileSystem的get(Configuration configuration)方法
@Test
public void getFileSystem1() throws IOException {
Configuration configuration = new Configuration();
//覆盖hdfs配置,得到分布式文件系统
configuration.set("fs.default","hdfs://node01:8020/");
FileSystem fileSystem = FileSystem.get(configuration);
System.out.println(fileSystem);
}
第二种方式:调用FileSystem的get(URI uri ,Configuration configuration)方法
@Test
public void getHdfs2() throws URISyntaxException, IOException {
//使用两个参数获取分布式文件系统
//第一个参数URI,定义了我们使用hdfs://方式访问就是分布式文件系统。
FileSystem fileSystem = FileSystem.get(new URI("hdfs://node01:8020"), new Configuration());
System.out.println(fileSystem.toString());
}
第三种方式:调用FileSystem的newInstance(Configuration configuration)方法
@Test
public void getHdfs3() throws IOException, URISyntaxException {
Configuration configuration = new Configuration();
configuration.set("fs.defaultFS", "hdfs://node01:8020/");
//调用newInstance方法,传入configuration参数
FileSystem fileSystem = FileSystem.newInstance(configuration);
System.out.println(fileSystem);
}
第四种方式:调用FileSystem的newInstance(URI uri,Configuration configuration)方法
@Test
public void getHdfs4() throws IOException, URISyntaxException {
FileSystem fileSystem = FileSystem.newInstance(new URI("hdfs://node01:8020/"),new Configuration());
System.out.println(fileSystem);
}
第一种方式:
@Test
public void getAllHdfsFilePath() throws URISyntaxException, IOException {
//第一步 获取分布式文件系统
FileSystem fileSystem = FileSystem.get(new URI("hdfs://node01/"), new Configuration());
//第二步 根据路径获取分布式文件系统的所有文件状态信息
Path path = new Path("/");
FileStatus[] fileStatuses = fileSystem.listStatus(path);
//第三步: 遍历数组,若是文件夹 ,调用getDirectoryFile方法
for (FileStatus fileStatus:fileStatuses){
if(fileStatus.isDirectory()){
getDirectoryFile(fileSystem,fileStatus);
}else{
System.out.println(fileStatus);
}
}
fileSystem.close();
}
private void getDirectoryFile(FileSystem fileSystem, FileStatus fileStatus) throws IOException {
//通过fileStatus获取文件夹的路径
Path path = fileStatus.getPath();
//通过路径继续往里面遍历,获取到所有文件夹下面的fileStatus
FileStatus[] fileStatuses = fileSystem.listStatus(path);
for (FileStatus status:fileStatuses){
if(status.isDirectory()){
getDirectoryFile(fileSystem,status);
}else{
System.out.println(status.toString());
}
}
}
第二种方式:
@Test
public void listHdfsFile() throws IOException, URISyntaxException {
FileSystem fileSystem = FileSystem.get(new URI("hdfs://node01:8020/"), new Configuration());
Path path = new Path("/");
//获取RemoteIterator 得到所有的文件或者文件夹,第一个参数指定遍历的路径,第二个参数表示是否要递归遍历
RemoteIterator locatedFileStatusRemoteIterator = fileSystem.listFiles(path,true);
while (locatedFileStatusRemoteIterator.hasNext()){
LocatedFileStatus next = locatedFileStatusRemoteIterator.next();
Path path1 = next.getPath();
System.out.println(path1.toString());
}
fileSystem.close();
}
}
@Test
public void copyHdfsToLocal() throws URISyntaxException, IOException {
FileSystem fileSystem = FileSystem.get(new URI("hdfs://node01:8020"), new Configuration());
Path path = new Path("hdfs://node01:8020/test/input/install.log");
FSDataInputStream inputStream = fileSystem.open(path);
FileOutputStream fileOutputStream = new FileOutputStream("e:\\install.log");
IOUtils.copy(inputStream, fileOutputStream);
IOUtils.closeQuietly(inputStream);
IOUtils.closeQuietly(fileOutputStream);
fileSystem.close();
5 Hdfs上面创建文件夹
@Test
public void createdHdfsDir() throws URISyntaxException, IOException {
FileSystem fileSystem = FileSystem.get(new URI("hdfs://node01:8020"), new Configuration());
fileSystem.mkdirs(new Path("/abc/dbc/ddd"));
fileSystem.close();
}
6.小文件合并
@Test
public void mergeFile() throws Exception{
//获取分布式文件系统
FileSystem fileSystem = FileSystem.get(new URI("hdfs://192.168.52.100:8020"), new Configuration(),"root");
FSDataOutputStream outputStream = fileSystem.create(new Path("/bigfile.xml"));
//获取本地文件系统
LocalFileSystem local = FileSystem.getLocal(new Configuration());
//通过本地文件系统获取文件列表,为一个集合
FileStatus[] fileStatuses = local.listStatus(new Path("file:///F:\\传智播客大数据离线阶段课程资料\\3、大数据离线第三天\\上传小文件合并"));
for (FileStatus fileStatus : fileStatuses) {
FSDataInputStream inputStream = local.open(fileStatus.getPath());
IOUtils.copy(inputStream,outputStream);
IOUtils.closeQuietly(inputStream);
}
IOUtils.closeQuietly(outputStream);
local.close();
fileSystem.close();
}