每个文件压缩前最大为50M,超出文件限制指标在新文件中存储,新文件序号+1,序号从001开始最大为999,性能文件生成后采用gzip压缩存储。
分析
生成测试文件
public class GenerateFileDemo {
public static void main(String[] args) throws IOException {
String path = CompressDemo.class.getResource("/").getPath();
BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(path + "big.data"));
String str = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1";
for (int i = 0; i < 1000000; i++) {
bufferedWriter.write(i + str);
bufferedWriter.newLine();
}
bufferedWriter.flush();
bufferedWriter.close();
}
}
public class BigFileWriteDemo {
// 1MB
private static final int FILE_SIZE = 1 * 1024 * 1024;
// 文件结束标识
private static final int EOF = -1;
public static void main(String[] args) throws IOException {
String path = BigFileWriteDemo.class.getResource("/").getPath();
System.out.println(path);
BufferedInputStream bis = new BufferedInputStream(new FileInputStream(path + "big.data"));
int available = bis.available();
System.out.println(available);
System.out.println(String.format("%.2fMB", available * 1.0 / FILE_SIZE));
// 拆分成每个为50Mb大小的文件
int saveSize = 50 * FILE_SIZE;
byte[] bytes = new byte[saveSize];
int length = EOF;
// 子文件下标
int filenameExt = 1000;
while ( (length = bis.read(bytes)) > EOF ) {
try (BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(path + "sub-big-" + (filenameExt++) + ".data"));) {
bos.write(bytes, 0, length);
}
}
bis.close();
}
}
合并文件
public class BigFileReadDemo {
// 1MB
private static final int FILE_SIZE = 1 * 1024 * 1024;
// 文件结束标识
private static final int EOF = -1;
public static void main(String[] args) throws IOException {
String path = BigFileReadDemo.class.getResource("/").getPath();
// 这里zzz为通用占位符、匹配拆分文件时下标数字
String filename = "sub-big-zzz.data".replace("zzz", "\\d+");
File file = new File(path);
if (!file.isDirectory()) {
return;
}
String name = file.getName();
System.out.println(name);
String[] list = file.list();
Stream<String> stream = Arrays.stream(list);
List<String> subFileNames = stream.filter(s->s.matches(filename))
.sorted()
.collect(Collectors.toList());
if (subFileNames == null || subFileNames.size() < 1) {
return ;
}
// 组装文件
BufferedOutputStream bos = new BufferedOutputStream(
new FileOutputStream(path + "composite-big.data"));
for (String subFilename : subFileNames) {
try (BufferedInputStream bis = new BufferedInputStream(new FileInputStream(path + subFilename));) {
int available = bis.available();
System.out.println(available);
// 每次读取1Mb大小的文件
byte[] bytes = new byte[FILE_SIZE];
int length = EOF;
while ((length = bis.read(bytes)) > EOF) {
bos.write(bytes, 0, length);
}
bos.flush();
}
}
bos.close();
}
}
定义抽象接口
public interface Compress {
/**
* 数据压缩
*/
byte[] compress(byte[] data) throws IOException;
/**
* 数据解压
*/
byte[] uncompress(byte[] data) throws IOException;
}
抽象压缩解压算法
public abstract class AbstractCompress implements Compress {
// 构建模板方法
protected abstract OutputStream createOutputStream(OutputStream output) throws IOException;
protected abstract InputStream createInputStream(InputStream input) throws IOException;
@Override
public byte[] compress(byte[] data) throws IOException {
ByteArrayOutputStream os = new ByteArrayOutputStream();
try (OutputStream cs = createOutputStream(os)) {
cs.write(data);
}
return os.toByteArray();
}
@Override
public byte[] uncompress(byte[] data) throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
byte[] buffer = new byte[4096];
int len = 0;
try (InputStream us = createInputStream(new ByteArrayInputStream(data))) {
while ((len = us.read(buffer)) != -1) {
baos.write(buffer, 0, len);
}
}
return baos.toByteArray();
}
}
实现Gzip算法
/**
* gzip算法
*
* 继承关系,基于DeflaterOutputStream的包装
*
* java.lang.Object
* java.io.OutputStream
* java.io.FilterOutputStream
* java.util.zip.DeflaterOutputStream
* java.util.zip.GZIPOutputStream
*
*
*/
public class GzipCompress extends AbstractCompress {
@Override
protected OutputStream createOutputStream(OutputStream output) throws IOException {
return new GZIPOutputStream(output);
}
@Override
protected InputStream createInputStream(InputStream input) throws IOException {
return new GZIPInputStream(input);
}
}
编写工具类
public class CompressUtil {
private static final Compress INSTANCE = new GzipCompress();
private CompressUtil() {}
public static byte[] compress(byte[] data) throws IOException {
return INSTANCE.compress(data);
}
public static byte[] uncompress(byte[] data) throws IOException {
return INSTANCE.uncompress(data);
}
}
测试
public class CompressDemo {
private static final String EXT = ".zip";
public static void main(String[] args) throws IOException {
String path = CompressDemo.class.getResource("/").getPath();
System.out.println(path);
BufferedInputStream bis = new BufferedInputStream(new FileInputStream(path + "demo.json"));
int available = bis.available();
byte[] data = new byte[available];
System.out.println(data.length);
bis.read(data);
byte[] compressData = CompressUtil.compress(data);
System.out.println(compressData.length);
// 保持压缩文件
BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(path + "demo.json" + EXT));
bos.write(compressData);
bis.close();
bos.close();
}
}
public class UnCompressDemo {
private static final String EXT = ".zip";
public static void main(String[] args) throws IOException {
String path = UnCompressDemo.class.getResource("/").getPath();
BufferedInputStream bis = new BufferedInputStream(new FileInputStream(path + "demo.json" + EXT));
int available = bis.available();
byte[] data = new byte[available];
System.out.println(data.length);
bis.read(data);
byte[] uncompressData = CompressUtil.uncompress(data);
System.out.println(uncompressData.length);
BufferedReader uncompressStream = new BufferedReader(new InputStreamReader(new ByteArrayInputStream(uncompressData)));
String line = null;
while ( (line = uncompressStream.readLine()) != null ) {
System.out.println(line);
}
bis.close();
uncompressStream.close();
}
}
输出原demo.json内容
剩下只需要组装功能即可完成上述需求,不再演示