感觉最后这个实验还是有点难度的,前前后后花了整整一个下午的时间,只给出实验思路和关键代码,仅供学习参考,千万不要直接抄袭啊。。。
package org.example;
import java.io.BufferedOutputStream;
import java.io.FileOutputStream;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
/**
* 生成100个文件,每个文件里面生成随机的数字
*/
public class RandomTxtFileCreator {
public static void main(String[] args) {
long start=System.currentTimeMillis();
// 生成文件的个数
int numFiles = 3;
// 文件里面的数据的条数
int numRecorders = 10;
// 指定文件的生成路径
String uri = "/home/lyp/backend-projects/demo/src/main/java/static";
Random random = new Random();
try {
// 循环生成numFiles个文件
for (int i = 1; i <= numFiles; i++) {
System.out.println("writing file#"+i);
// 指定当前生成的文件的路径
FileOutputStream fileOutputStream = new FileOutputStream(uri + "/file" + i);
// 将每个文件的生成的内容暂时存储到一个列表里面
List<String> list = new ArrayList<>();
for (int j = 0; j < numRecorders; j++){
list.add(random.nextInt(numRecorders) + 1 + "\t" + "the recorder #" + j+ " in file#" + i);
}
PrintStream pStream = new PrintStream(new BufferedOutputStream(fileOutputStream));
// 将这个list里面的所有的内容写到当前的这个文件里面
for (String str : list) {
pStream.println(str);
}
pStream.close();
fileOutputStream.close();
}
} catch (Exception e) {
e.printStackTrace();
}
finally {
long end=System.currentTimeMillis();
System.out.println("write "+numFiles+" files successfully in "+ (end-start)+"ms");
}
}
}
package org.example;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.ID;
import java.io.*;
import java.net.URI;
import java.net.URISyntaxException;
public class MergeFile {
public static void main(String[] args) throws IOException, URISyntaxException {
Configuration conf = new Configuration();
String uri = "hdfs://localhost:9000/user/lyp/gzfile";
Path hdfsDir = new Path(uri);
FileSystem hdfs = FileSystem.get(new URI(uri), conf);
String localDir = "/home/lyp/backend-projects/demo/src/main/java/static/";
File gzFileDir = new File(localDir);
String[] gzFiles = gzFileDir.list();
Text value = new Text();
Text key = new Text();
if (gzFiles == null) {
return;
}
int fileLength = gzFiles.length;
String line;
SequenceFile.Writer writer = SequenceFile.createWriter(hdfs, conf, hdfsDir, Text.class, value.getClass());
while(fileLength>0) {
File gzFile = new File(localDir + gzFiles[fileLength - 1]);
BufferedReader reader = new BufferedReader(new FileReader(gzFile.getPath()));
StringBuilder content = new StringBuilder();
while((line=reader.readLine())!= null) {
// 对于第二种查询,key是文件里面前面的数字,value对应行的后面的内容
String[] group = line.split("\t");
key.set(group[0]);
value.set(group[1]);
writer.append(key, value);
content.append(line).append("\n");
// 对于第三种查询,key就是文件名+数字,value就是对应的后面的内容
key.set(gzFile.getName() + group[0]);
value.set(group[1]);
writer.append(key,value);
}
// 对于第一种查询,key是文件的名字,value是文件的内容
key.set(gzFile.getName());
value.set(content.toString());
writer.append(key, value);
fileLength--;
}
}
}
package org.example;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.util.ReflectionUtils;
import java.io.*;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Objects;
import java.util.Scanner;
public class Query {
static Scanner scanner = new Scanner(System.in);
public static void main(String[] args) throws URISyntaxException, IOException {
Configuration conf = new Configuration();
String uri = "hdfs://localhost:9000/user/lyp/gzfile";
Path hdfsDir = new Path(uri);
FileSystem hdfs = FileSystem.get(new URI(uri), conf);
String outputPath = "/home/lyp/backend-projects/demo/src/main/java/static/output";
SequenceFile.Reader reader = new SequenceFile.Reader(hdfs, hdfsDir, conf);
Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
while(true){
System.out.println("请选择执行哪种查询操作:");
System.out.println("1、给出文件名,可以从序列文件整体读取文件并存储到指定的位置");
System.out.println("2、给出某个整数的key,可以读取所有该key的数据,并给出所在文件的名称(可以输出到控制台)");
System.out.println("3、给出文件名和整数的key,可以读取该文件中的对应key的数据(可以输出到控制台)");
System.out.println("输入exit退出整个程序");
System.out.printf("%s", "请选择:");
String choose = scanner.nextLine();
switch (choose){
case "1":
System.out.printf("%s: ", "请输入文件名");
String filename = scanner.nextLine();
FileOutputStream out = new FileOutputStream(outputPath);
PrintStream printStream = new PrintStream(new BufferedOutputStream(out));
while(reader.next(key,value)) {
String tempKey = key.toString();
String tempValue = value.toString();
if(Objects.equals(tempKey, filename)) {
printStream.println(tempValue);
}
}
printStream.close();
out.close();
IOUtils.closeStream(reader);
break;
case "2":
System.out.printf("%s:", "请输入对应的key");
String queryKey = scanner.nextLine();
while(reader.next(key,value)){
String tempKey = key.toString();
String tempValue = value.toString();
if(Objects.equals(tempKey, queryKey)) {
System.out.println(tempValue);
}
}
break;
case "3":
System.out.printf("%s: ", "输入查询的文件名");
String queryFilename = scanner.nextLine();
System.out.printf("%s: ", "请输入对应的整数key");
String queryK = scanner.nextLine();
String realKey = queryFilename + queryK;
while(reader.next(key, value)) {
String tempKey = key.toString();
String tempValue = value.toString();
if(Objects.equals(tempKey, realKey)){
System.out.println(tempValue);
}
}
break;
case "exit":
return;
}
}
}
}