压缩需求:压缩文件夹下所有具有统一文件名规范的普通文件,同时添加文件名与压缩包相同的MANIFEST文件,包含所有被压缩文件的文件名及大小列表。
Java文件压缩涉及到的类均在java.uti.zip包下:
- ZipOutputStream: Zip包的输出流,利用putNextEntry()向包内添加一个文件。
- ZipEntry: 将被压缩到包内的文件实例,在文件名后添加文件分隔符可以压缩目录。
代码如下:
public class FileZipper {
private static String INPUT_FPATH = "C:/temp/fileZipper/DropZone/Input_Min";
private static String OUTPUT_ZIP_PATH = "C:/temp/fileZipper/DropZone/Input_Zip";
private static String OUTPUT_ZIP_TSPATH = "/SRE/apps/ican505/sandbox/jerry/filezipper/Output_TS";
private static String ARCHIEVE_ZONE = "C:/temp/fileZipper/ArchiveZone";
private static String INPUT_DELIM = ":";
private static String ZIP_FPATTERN = "I2001_GLB";
private static int AVG_FILENAME_LEN = 45; // Default to length of each entry in MANIFEST
private static int BUF_SIZE = 1024;
private static int STD_ZIP_ENTRIES = 20;
public String zipFilesByJava(String strZipFilePattern, String strFilePath, String strOutputFileList, String strErrorZonePath) {
System.out.println("***** Inside zipFilesByJava method *****");
System.out.println("***** Source Location of files: " + strFilePath + ", Zip File Pattern: " + strZipFilePattern + " *****");
String strZipFName = "";
String strEntryFName = "";
String strManifestFName = "";
SimpleDateFormat dateFormatter = new SimpleDateFormat("yyyyMMddHHmmssSS");
StringTokenizer strFileList = new StringTokenizer(strOutputFileList, "|");
if (strFileList == null || strFileList.countTokens() == 0) {
System.out.println("[ERROR] Input file list is NULL or EMPTY!");
return null;
}
FileInputStream fin = null;
ZipOutputStream zos = null;
ZipEntry zipEntry = null;
StringBuffer sbZippedEntryList = null;
if (STD_ZIP_ENTRIES <= 0) {
System.out.println("[WARN] The property STD_ZIP_ENTRIES can NOT be negative or zero! Reset to 100");
STD_ZIP_ENTRIES = 100;
}
if (BUF_SIZE <= 0) {
System.out.println("[WARN] The property BUF_SIZE can NOT be negative or zero! Reset to 1024");
BUF_SIZE = 1024;
}
byte[] bInputBuf = new byte[BUF_SIZE];
int iTotalFiles = strFileList.countTokens();
int iTotalZips = iTotalFiles / STD_ZIP_ENTRIES == 0 ? 1 : iTotalFiles / STD_ZIP_ENTRIES;
int iEntryListLen = (iTotalZips == 1) ? AVG_FILENAME_LEN : STD_ZIP_ENTRIES * AVG_FILENAME_LEN;
int AVG_FILENAME_LEN = 45; // Length of entry in MANIFEST which contains standard filename and file size. // i.e, I1001_GWM_7360_MAN_20100426000000.txt|1111
StringBuffer sbZipFNameList = new StringBuffer(iTotalZips);
String strTS = dateFormatter.format(new Date());
try {
System.out.println("[INFO] Current Time in Millis: " + System.currentTimeMillis());
for (int iCntZip = 0; iCntZip < iTotalZips; iCntZip++) {
if (iTotalFiles == 1) {
strZipFName = strOutputFileList;
strZipFName = OUTPUT_ZIP_PATH + File.separatorChar + strZipFName.replaceAll(".txt$", ".zip");
} else {
strZipFName = OUTPUT_ZIP_PATH + File.separatorChar + strZipFilePattern + "_" + strTS + ".zip";
}
zos = new ZipOutputStream(new CheckedOutputStream(new FileOutputStream(strZipFName), new CRC32()));
if (zos == null) {
System.out.println("[ERROR] ZIPOutputStream for output zip file is NULL!");
return null;
}
if (iCntZip != 0 && iCntZip == iTotalZips - 1)
iEntryListLen = STD_ZIP_ENTRIES * 2 * AVG_FILENAME_LEN;
sbZippedEntryList = new StringBuffer(iEntryListLen);
int iCntEntry = 0;
int iReadSize = 0;
int iFileSize = 0;
while ((iCntZip != iTotalZips - 1 && iCntEntry < STD_ZIP_ENTRIES) || (iCntZip == iTotalZips - 1 && iCntEntry < 2 * STD_ZIP_ENTRIES) && strFileList.hasMoreTokens()) {
strEntryFName = strFileList.nextToken();
if (!(new File(strFilePath + File.separatorChar + strEntryFName).exists())) {
System.out.println("[ERROR] The input file " + strFilePath + File.separatorChar + strEntryFName + " does NOT exist!");
return null;
}
zipEntry = new ZipEntry(strEntryFName);
zos.putNextEntry(zipEntry);
fin = new FileInputStream(strFilePath + File.separatorChar + strEntryFName);
while ((iReadSize = fin.read(bInputBuf)) != -1) {
zos.write(bInputBuf);
iFileSize += iReadSize;
bInputBuf = new byte[BUF_SIZE];
}
iCntEntry++;
sbZippedEntryList.append(strEntryFName + "|" + iFileSize + "\n");
iReadSize = 0;
iFileSize = 0;
System.out.println("[INFO] Entry No. " + iCntEntry + ": " + strEntryFName);
fin.close();
if(new File(strFilePath + File.separatorChar + strEntryFName).renameTo(new File(ARCHIEVE_ZONE + File.separatorChar + strEntryFName)))
System.out.println("[INFO] Entry file " + strEntryFName + " is already moved to Archieve Zone!");
}
strManifestFName = strZipFilePattern + "_" + strTS + ".MANIFEST";
zipEntry = new ZipEntry(strManifestFName);
zos.putNextEntry(zipEntry);
zos.write(sbZippedEntryList.toString().getBytes());
zos.close();
System.out.println("---------------------------\n[TOTAL] Zip No. "
+ (iCntZip + 1) + ": " + iCntEntry + " files compressed into zip "
+ strZipFName + "!\n---------------------------\n");
sbZipFNameList.append(strZipFName);
}
System.out.println("[END] Finished reading and zipping!");
System.out.println("[INFO] Current Time in Millis: " + System.currentTimeMillis());
} catch(ZipException zipE){
System.out.println("[ERROR] ZipException thrown!");
zipE.printStackTrace();
} catch (IOException ioE) {
System.out.println("[ERROR] IOException thrown!");
ioE.printStackTrace();
}
return sbZipFNameList.toString();
}
解压缩需求:解压缩一个zip文件,读取MANIFEST文件,一一验证MANIFEST文件中所列的文件名和大小,只有当所有文件和大小均匹配时解压zip包至特定目录。
Java文件解压缩需要用到的类包括ZipFile(用文件名实例化一个压缩包)和ZipEntry,ZipEntry实例被从ZipFile实例获取。 利用ZipFile的getOutputStream()(zipEntry实例作为参数)可以获取对zipEntry的输入流,由此读入数据并写出到文件系统。
代码如下:
public class FileUnzipper {
public static Logger logger = Logger.getLogger(FileUnzipper.class.getName());
public String strErrMsg = "";
void unzip(String strZipFilePath, String strTargetFilePath, String strErrorZone, String strZipPattern, String strClaimsFilePattern,
String strArchivePath) {
if (!new File(strZipFilePath).exists() || !new File(strTargetFilePath).exists() || strZipPattern == null || strZipPattern.trim().length() <= 0
|| strClaimsFilePattern == null || strClaimsFilePattern.trim().length() <= 0){
strErrMsg = "The mandatory location/filename for unzipping is(are) NOT valid!";
logger.error("[" + PropSet.GIFInterfaceId + "] " + strErrMsg);
return;
}
File[] fZipArray = new File(strZipFilePath).listFiles(new FileListFilter(strZipPattern, "zip"));
if (fZipArray == null || fZipArray.length == 0){
strErrMsg = "No Zip file with pattern " + strZipPattern + " exist in the source path: " + strZipFilePath;
logger.error("[" + PropSet.GIFInterfaceId + "] " + strErrMsg);
return;
}
ZipFile zipFile = null;
ZipEntry zipEntry = null;
Enumeration zipAllEntries = null;
BufferedReader bufManifestReader = null;
Map mapFNameToSize = new HashMap();
InputStream isEntryFile = null;
OutputStream osEntryFile = null;
OutputStream osEntryArchive = null;
String strZipFName = "";
String strManifestFName = "";
String strReadLine = "";
String strEntryFName = "";
long lEntryFSize = 0L;
int iEntryCounter = 0;
byte[] bReadBuf = new byte[PropSet.FILE_WRITE_BUFSIZE];
boolean bEntryMismatched = false;
try{
for (int iZip = 0; iZip < fZipArray.length; iEntryCounter = 0, iZip++){
strZipFName = fZipArray[iZip].getName();
strManifestFName = strZipFName.substring(0, strZipFName.indexOf(".zip")) + ".MANIFEST";
if(!fZipArray[iZip].isFile()){
logger.warn("The zip file " + strZipFName + " is directory or NOT a normal zip file! Moving to ErrorZone...");
if(fZipArray[iZip].renameTo(new File(strErrorZone + File.separator + strZipFName)))
logger.info("Successfully moved to ErrorZone!");
else logger.error("Failed to move to ErrorZone!");
continue;
}
zipFile = new ZipFile(fZipArray[iZip]);
// process manifest entry
zipEntry = zipFile.getEntry(strManifestFName);
if(zipEntry == null){
strErrMsg = "The zip file " + strZipFName + " does NOT contain MANIFEST file! Moving the zip file to ErrorZone...";
logger.error("[" + PropSet.GIFInterfaceId + "] " + strErrMsg);
if(!fZipArray[iZip].renameTo(new File(strErrorZone + File.separator + strZipFName)))
logger.info("Successfully moved to ErrorZone!");
else logger.error("Failed to move to ErrorZone!");
continue;
}
bufManifestReader = new BufferedReader(new InputStreamReader(zipFile.getInputStream(zipEntry)));
while((strReadLine = bufManifestReader.readLine()) != null){
try{
mapFNameToSize.put(strReadLine.split("[|]")[0], strReadLine.split("[|]")[1]);
}catch(Exception exc){
throw new Exception("Failed to add [filename --> filesize] mapping to hashmap from manifest file! Line supposed to be mapped: " + strReadLine);
}
}
mapFNameToSize.put(strManifestFName, Long.toString(zipEntry.getSize()));
bufManifestReader.close();
// process data entries
zipAllEntries = zipFile.entries();
while(zipAllEntries.hasMoreElements()){
bEntryMismatched = false;
zipEntry = (ZipEntry)zipAllEntries.nextElement();
strEntryFName = zipEntry.getName().trim();
lEntryFSize = zipEntry.getSize();
if(zipEntry.isDirectory()){
logger.warn("The entry " + strEntryFName + " IS directory, to ignore it.");
continue;
}
if(!(strEntryFName.startsWith(strClaimsFilePattern) || strEntryFName.trim().equals(strManifestFName))){
bEntryMismatched = true;
strErrMsg = "Invalid Zip Entry Name " + strEntryFName + ", Decompressing to ErrorZone...";
logger.error("[" + PropSet.GIFInterfaceId + "] " + strErrMsg);
}else if(!mapFNameToSize.containsKey(strEntryFName) || !mapFNameToSize.get(strEntryFName).equals(Long.toString(lEntryFSize))){
bEntryMismatched = true;
strErrMsg = "Entry-filename: " + strEntryFName + ", Entry-filesize: " + lEntryFSize
+ " mismatch with the value " + mapFNameToSize.get(strEntryFName) + " in Manifest file! Decompressing to ErrorZone...";
logger.error("[" + PropSet.GIFInterfaceId + "] " + strErrMsg);
}
isEntryFile = zipFile.getInputStream(zipEntry);
osEntryFile = bEntryMismatched ? new FileOutputStream(strErrorZone + File.separator + strEntryFName) : new FileOutputStream(strTargetFilePath + File.separator + strEntryFName);
osEntryArchive = new FileOutputStream(strArchivePath + File.separator + strEntryFName);
while(isEntryFile.read(bReadBuf) != -1){
osEntryFile.write(bReadBuf);
osEntryArchive.write(bReadBuf);
}
isEntryFile.close();
osEntryFile.close();
osEntryArchive.close();
if(!bEntryMismatched){
iEntryCounter++;
logger.info("Matched Entry No. " + iEntryCounter + " | Entry Name: " + strEntryFName +", Entry File Size: " + lEntryFSize);
}else logger.info("The mismatched Entry file was Decompressed to ErrorZone and Archived!");
}
logger.info("Zip No. " + iZip + " | Zip Filename: " + strZipFName + ", Entries contained: " + zipFile.size() + ", Entries decompressed: " + iEntryCounter);
zipFile.close();
if(fZipArray[iZip].delete())
logger.info("Deleted the zip file!");
else logger.warn("Failed to delete the zip file!");
}
logger.info("Finished process the zip file(s) in location " + strZipFilePath);
}catch(IOException ioE){
strErrMsg = "IOException happned with msg: " + ioE.getMessage();
logger.error("[" + PropSet.GIFInterfaceId + "] " + strErrMsg);
if(fZipArray != null && fZipArray.length > 0){
for(int iZip = 0; iZip < fZipArray.length; iZip++){
if(!fZipArray[iZip].renameTo(new File(strErrorZone + File.separator + fZipArray[iZip].getName()))){
logger.warn("Failed to move the left zip file " + fZipArray[iZip].getName() + " to ErrorZone " + strErrorZone);
}else logger.info("Moved the zip file " + fZipArray[iZip].getName() + " to ErrorZone " + strErrorZone);
}
}
}catch(Exception exc){
strErrMsg = "Exception happened with msg: " + exc.getMessage();
logger.error("[" + PropSet.GIFInterfaceId + "] " + strErrMsg);
}
}
class FileListFilter implements FilenameFilter {
private String name;
private String extension;
public FileListFilter(String name, String extension) {
this.name = name;
this.extension = extension;
}
public boolean accept(File directory, String filename) {
boolean fileOK = true;
if (name != null) {
fileOK &= filename.startsWith(name);
}
if (extension != null) {
fileOK &= filename.endsWith('.' + extension);
}
return fileOK;
}
}
class PropSet {
static int FILE_WRITE_BUFSIZE = 1024;
static String GIFInterfaceId = "I2001_GLB";
}
由于需求特定,代码实现有所局限。
看到
Snowolf对带有压缩算法的压缩/解压缩的研究,在实际应用中需要面对不同的问题和场景,如文件读写同步/目录压缩及解压缩/中文编码等。故本文章也会适时更新。