HDFS:java API
- 1、使用URL读取数据
- 2、通过FileSystem API读取数据fs.open
- 3、创建目录fs.mkdirs
- 4、将本地文件复制到hadoop文件系统并显示进度fs.create
- 5、从tmp目录下载所有txt文件到本地
- 6、删除数据fs.delete
- 7、列出目录文件fs.listStatus
- 8、文件上传
- 9、文件下载
1、使用URL读取数据
package com.demo.hellohadoop;
import org.apache.hadoop.fs.FsUrlStreamHandlerFactory;
import org.apache.hadoop.io.IOUtils;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
/**
* 使用URL读取数据
*/
public class HDFSTest01 {
static {
URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory());
}
public static void main(String[] args) {
final String hdfsURL = "hdfs://192.168.236.135:9000/tmp/README.txt";
InputStream inputStream = null;
try {
inputStream = new URL(hdfsURL).openStream();
IOUtils.copyBytes(inputStream, System.out, 4096, false);
} catch (IOException e) {
e.printStackTrace();
} finally {
IOUtils.closeStream(inputStream);
}
}
}
2、通过FileSystem API读取数据fs.open
package com.demo.hellohadoop;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import java.io.IOException;
import java.net.URI;
/**
* 通过FileSystem API读取数据
* */
public class HDFSTest02 {
public static void main(String[] args) {
final String hdfs_address = "hdfs://192.168.236.135:9000";
final String uri = hdfs_address + "/tmp/README.txt";
Configuration config = new Configuration();
FSDataInputStream in = null;
try {
FileSystem fileSystem = FileSystem.get(URI.create(hdfs_address), config);
in = fileSystem.open(new Path(uri));
IOUtils.copyBytes(in, System.out, 4096, false);
} catch (IOException e) {
e.printStackTrace();
} finally {
IOUtils.closeStream(in);
}
}
}
3、创建目录fs.mkdirs
package com.demo.hellohadoop;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.omg.PortableInterceptor.SYSTEM_EXCEPTION;
import java.io.IOException;
import java.net.URI;
/*
* 创建目录
* */
public class HDFSTest03 {
public static void main(String[] args) {
final String hdfsAddress = "hdfs://192.168.236.135:9000";
Configuration config = new Configuration();
try {
FileSystem fileSystem = FileSystem.get(URI.create(hdfsAddress), config, "hadoop");
boolean flag = fileSystem.mkdirs(new Path(hdfsAddress + "/tmp/tmpDir1"));
System.out.println("make directory " + (flag ? "successfully" : "failed"));
} catch (IOException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
4、将本地文件复制到hadoop文件系统并显示进度fs.create
package com.demo.hellohadoop;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.io.IOException;
import java.net.URI;
/*
* 将本地文件复制到hadoop文件系统并显示进度
* */
public class HDFSTest04 {
private static final String LOCAL_FILE = "D:\\workspace_idea\\hello-hadoop\\src\\main\\resources\\test.txt";
private static final String HDFS_ADDRESS = "hdfs://192.168.236.135:9000";
private static final String REMOTE_FILE = HDFS_ADDRESS + "/tmp/test.txt";
public static void main(String[] args) {
BufferedInputStream in = null;
FSDataOutputStream out = null;
try {
in = new BufferedInputStream(new FileInputStream(LOCAL_FILE));
FileSystem fileSystem = FileSystem.get(URI.create(HDFS_ADDRESS), new Configuration(), "hadoop");
out = fileSystem.create(new Path(REMOTE_FILE), () -> System.out.print("*"));
IOUtils.copyBytes(in, out, 8, false);
} catch (IOException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
} finally {
IOUtils.closeStream(in);
IOUtils.closeStream(out);
}
}
}
5、从tmp目录下载所有txt文件到本地
package com.demo.hellohadoop;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.IOUtils;
import java.io.*;
import java.net.URI;
/*
* 从tmp目录下载所有txt文件到本地
* */
public class HDFSTest05 {
private static final String HDFS_ADDR = "hdfs://192.168.236.135:9000";
private static final String HDFS_USER = "hadoop";
private static final String LOCAL_DIR = "D:/workspace_idea/hello-hadoop/src/main/resources";
public static void main(String[] args) throws IOException, InterruptedException {
String uri = HDFS_ADDR + "/tmp";
try {
FileSystem fs = FileSystem.get(URI.create(HDFS_ADDR), new Configuration(), HDFS_USER);
FileStatus[] fileStatuses = fs.listStatus(new Path(uri), (path) -> path.toString().matches(".*txt"));
for (FileStatus fileStatus : fileStatuses) {
String path = fileStatus.getPath().toString();
String[] split = path.split("/");
String filename = split[split.length - 1];
System.out.println("download ==> " + path);
downLoadFile(path, filename);
}
} catch (IOException e) {
e.printStackTrace();
}
}
public static void downLoadFile(String source, String filename) {
FSDataInputStream in = null;
try {
FileSystem fs = FileSystem.get(URI.create(HDFS_ADDR), new Configuration(), HDFS_USER);
FileOutputStream out = new FileOutputStream(LOCAL_DIR + File.separator + filename);
in = fs.open(new Path(source));
IOUtils.copyBytes(in, out, 4096, false);
} catch (IOException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
} finally {
IOUtils.closeStream(in);
}
}
}
6、删除数据fs.delete
package com.demo.hellohadoop;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.io.IOException;
import java.net.URI;
/**
* 删除文件
* */
public class HDFSTest06 {
private static final String HDFS_ADDR = "hdfs://192.168.236.135:9000";
public static void main(String[] args) {
try {
FileSystem fs = FileSystem.get(URI.create(HDFS_ADDR), new Configuration(), "hadoop");
boolean flag = fs.delete(new Path(HDFS_ADDR + "/tmp/README.txt"), false);
System.out.println("delete file : "+ flag);
} catch (IOException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
7、列出目录文件fs.listStatus
package com.demo.hellohadoop;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.io.IOException;
import java.net.URI;
/*
* 列出目录下文件
* */
public class HDFSTest10 {
private static final String HDFS_ADDR = "hdfs://192.168.236.135:9000";
public static void main(String[] args) {
try (FileSystem fs = FileSystem.get(URI.create(HDFS_ADDR), new Configuration(), "hadoop")) {
FileStatus[] fileStatuses = fs.listStatus(new Path(HDFS_ADDR + "/tmp"));
for (FileStatus fileStatus : fileStatuses) {
System.out.println(fileStatus.getPath());
}
} catch (InterruptedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
8、文件上传
package com.demo.hellohadoop;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.io.IOException;
import java.net.URI;
/*
* 文件上传
* */
public class HDFSTest08 {
private static final String HDFS_ADDR = "hdfs://192.168.236.135:9000";
public static void main(String[] args) {
try (FileSystem fs = FileSystem.get(URI.create(HDFS_ADDR), new Configuration(), "hadoop");) {
Path src = new Path("D:\\workspace_idea\\hello-hadoop\\src\\main\\resources\\hello");
Path dest = new Path(HDFS_ADDR + "/tmp");
fs.copyFromLocalFile(src, dest);
} catch (IOException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
9、文件下载
package com.demo.hellohadoop;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URI;
public class HDFSTest09 {
private static final String HDFS_ADDR = "hdfs://192.168.236.135:9000";
public static void main(String[] args) {
try (FileSystem fs = FileSystem.get(URI.create(HDFS_ADDR), new Configuration())) {
Path src = new Path(HDFS_ADDR + "/tmp/README.txt");
FileOutputStream out = new FileOutputStream("D:\\workspace_idea\\hello-hadoop\\src\\main\\resources\\README.txt");
FSDataInputStream in = fs.open(src);
IOUtils.copyBytes(in, out, 4096, false);
} catch (IOException e) {
e.printStackTrace();
}
}
}