在之前写的内容中涉及到的内容比较高层次简单,本篇文章更深一步分析ZipFile如何解析zip压缩包中具体文件,然后进行读取解压。
1、.NET Core 文件路径解决方法,统一Linux Window
2、 ZipFIle解压原理
在上篇文章中说到首先是读取压缩包文件,然后讲压缩包转为对象ZipArchive
using (ZipArchive archive = ZipFile.Open(sourceArchiveFileName, ZipArchiveMode.Read, entryNameEncoding))
{
archive.ExtractToDirectoryExtension(destinationDirectoryName, overwriteFiles);
}
ZipFile.Open(string archiveFileName, ZipArchiveMode mode, Encoding? entryNameEncoding) 方法中
FileStream(String, FileMode, FileAccess, FileShare, Int32, Boolean)
接下来就是创建ZipArchive对象
public ZipArchive(Stream stream, ZipArchiveMode mode, bool leaveOpen, Encoding? entryNameEncoding)
public ZipArchive(Stream stream, ZipArchiveMode mode, bool leaveOpen, Encoding? entryNameEncoding)
{
if (stream == null)
throw new ArgumentNullException(nameof(stream));
EntryNameEncoding = entryNameEncoding;
Stream? extraTempStream = null;
try
{
//省略代码
//初始化 ZipArchive 数据
//省略代码
_entriesCollection = new ReadOnlyCollection<ZipArchiveEntry>(_entries);
switch (mode)
{
//省略代码
//。。。
//省略代码
case ZipArchiveMode.Read:
//读取中央目录结束标记
ReadEndOfCentralDirectory();
break;
//省略代码
//。。。
//省略代码
}
}
catch
{
if (extraTempStream != null)
extraTempStream.Dispose();
throw;
}
}
方法ReadEndOfCentralDirectory
private void ReadEndOfCentralDirectory()
{
// This seeks backwards almost to the beginning of the EOCD, one byte after where the signature would be
// located if the EOCD had the minimum possible size (no file zip comment)
// 向后查找到EOCD的开头,即签名所在位置之后的一个字节
// 查找EOCD具有可能的最小值(没有文件zip注释)
_archiveStream.Seek(-ZipEndOfCentralDirectoryBlock.SizeOfBlockWithoutSignature, SeekOrigin.End);
// If the EOCD has the minimum possible size (no zip file comment), then exactly the previous 4 bytes will contain the signature
// But if the EOCD has max possible size, the signature should be found somewhere in the previous 64K + 4 bytes
//如果EOCD具有尽可能小的大小(没有zip文件注释),那么前面的4个字节将恰好包含签名
//但是如果EOCD有最大的可能大小,签名应该在前面的64K + 4字节中找到
if (!ZipHelper.SeekBackwardsToSignature(_archiveStream,
ZipEndOfCentralDirectoryBlock.SignatureConstant,
ZipEndOfCentralDirectoryBlock.ZipFileCommentMaxLength + ZipEndOfCentralDirectoryBlock.SignatureSize))
throw new InvalidDataException();
//省略代码
//。。。
//省略代码
}
ZipHelper.SeekBackwardsToSignature
internal static bool SeekBackwardsToSignature(Stream stream, uint signatureToFind, int maxBytesToRead)
{
int bufferPointer = 0;
bool signatureFound = false;
//省略代码
//计算核心目录头的偏移量
//省略代码
if (!signatureFound)
{
return false;
}
else
{
//将数据流设置到当前流中‘核心目录头’的位置。
stream.Seek(bufferPointer, SeekOrigin.Current);
return true;
}
}
循环读取压缩包中文档
foreach (ZipArchiveEntry entry in source.Entries)
{
entry.ExtractRelativeToDirectoryExtension(destinationDirectoryName, overwriteFiles);
}
读取ZipArchive属性Entries
public ReadOnlyCollection<ZipArchiveEntry> Entries
{
get
{
if (_mode == ZipArchiveMode.Create)
throw new NotSupportedException();
ThrowIfDisposed();
//确保已经读取过核心目录头,核心目录头:Zip数据协议格式中项,后面补上
//
EnsureCentralDirectoryRead();
return _entriesCollection;
}
}
private void EnsureCentralDirectoryRead()
{
//是否未过读取压缩文档
if (!_readEntries)
{
//没有读取过则开始读取,并转换未ZipArchiveEntry集合
ReadCentralDirectory();
_readEntries = true;
}
}
private void ReadCentralDirectory()
{
try
{
// assume ReadEndOfCentralDirectory has been called and has populated _centralDirectoryStart
//设置当前文件流到核心目录开始的位置
_archiveStream.Seek(_centralDirectoryStart, SeekOrigin.Begin);
long numberOfEntries = 0;
Debug.Assert(_archiveReader != null);
//read the central directory
ZipCentralDirectoryFileHeader currentHeader;
bool saveExtraFieldsAndComments = Mode == ZipArchiveMode.Update;
//循环读取文件核心头
while (ZipCentralDirectoryFileHeader.TryReadBlock(_archiveReader,
saveExtraFieldsAndComments, out currentHeader))
{
//转为ZipArchiveEntry 并添加到集合中
AddEntry(new ZipArchiveEntry(this, currentHeader));
numberOfEntries++;
}
if (numberOfEntries != _expectedNumberOfEntries)
throw new InvalidDataException();
}
catch (EndOfStreamException ex)
{
throw new InvalidDataException();
}
}
public static bool TryReadBlock(BinaryReader reader, bool saveExtraFieldsAndComments, out ZipCentralDirectoryFileHeader header)
{
header = default;
if (reader.ReadUInt32() != SignatureConstant)
return false;
header.VersionMadeBySpecification = reader.ReadByte();
header.VersionMadeByCompatibility = reader.ReadByte();
header.VersionNeededToExtract = reader.ReadUInt16();
header.GeneralPurposeBitFlag = reader.ReadUInt16();
header.CompressionMethod = reader.ReadUInt16();
header.LastModified = reader.ReadUInt32();
header.Crc32 = reader.ReadUInt32();
uint compressedSizeSmall = reader.ReadUInt32();
uint uncompressedSizeSmall = reader.ReadUInt32();
header.FilenameLength = reader.ReadUInt16();
header.ExtraFieldLength = reader.ReadUInt16();
header.FileCommentLength = reader.ReadUInt16();
ushort diskNumberStartSmall = reader.ReadUInt16();
header.InternalFileAttributes = reader.ReadUInt16();
header.ExternalFileAttributes = reader.ReadUInt32();
uint relativeOffsetOfLocalHeaderSmall = reader.ReadUInt32();
//代码省略,读取其他信息
return true;
}
}
文件核心目录头ZipCentralDirectoryFileHeader
数据格式
字段 | 类型 | 含义 |
---|---|---|
SignatureConstant | const uint | 核心目录头签名 |
VersionMadeByCompatibility | byte | 版本规约 |
VersionMadeBySpecification | byte | 版本兼容性 |
VersionNeededToExtract | ushort | 解压版本 |
GeneralPurposeBitFlag | ushort | 通用为编辑 |
CompressionMethod | ushort | 压缩方法 |
LastModified | uint | 最后修改时间 |
Crc32 | uint | crc-校验码 |
CompressedSize | long | 压缩后大小 |
UncompressedSize | long | 压缩前数据大小 |
FilenameLength | ushort | 文件名长度 |
ExtraFieldLength | ushort | 扩展域长 |
FileCommentLength | ushort | 文件注释长 |
DiskNumberStart | int | 文件磁盘开始位置 |
InternalFileAttributes | ushort | 内部文件属性 |
ExternalFileAttributes | uint | 外部文件属性 |
RelativeOffsetOfLocalHeader | long | 相对于本地头的偏移量,用于后面计算读取文件内容的起始位置 |
Filename | byte[] | 文件名称 |
FileComment | byte[] | 文件注释 |
ExtraFields | List<ZipGenericExtraField> | 文件扩展域 |
while (ZipCentralDirectoryFileHeader.TryReadBlock(_archiveReader,
saveExtraFieldsAndComments, out currentHeader))
{
//转为ZipArchiveEntry 并添加到集合中
AddEntry(new ZipArchiveEntry(this, currentHeader));
numberOfEntries++;
}
->
internal ZipArchiveEntry(ZipArchive archive, ZipCentralDirectoryFileHeader cd)
{
_archive = archive;
_originallyInArchive = true;
_diskNumberStart = cd.DiskNumberStart;
_versionMadeByPlatform = (ZipVersionMadeByPlatform)cd.VersionMadeByCompatibility;
_versionMadeBySpecification = (ZipVersionNeededValues)cd.VersionMadeBySpecification;
_versionToExtract = (ZipVersionNeededValues)cd.VersionNeededToExtract;
//省略代码
//初始化 ZipArchiveEntry
}
前面步骤处理完成后返回文档集合
public static void ExtractToFileExtension(this ZipArchiveEntry source, string destinationFileName, bool overwrite)
{
//省略代码
using (Stream fs = new FileStream(destinationFileName, fMode, FileAccess.Write, FileShare.None, bufferSize: 0x1000, useAsync: false))
{
//source.Open 根据文档条目目录文件头信息读取数据
using (Stream es = source.Open())
es.CopyTo(fs); //保存
}
//设置最后修改时间
File.SetLastWriteTime(destinationFileName, source.LastWriteTime.DateTime);
}
public Stream Open()
{
ThrowIfInvalidArchive();
switch (_archive.Mode)
{
case ZipArchiveMode.Read:
//我们只看这行
return OpenInReadMode(checkOpenable: true);
case ZipArchiveMode.Create:
return OpenInWriteMode();
case ZipArchiveMode.Update:
default:
Debug.Assert(_archive.Mode == ZipArchiveMode.Update);
return OpenInUpdateMode();
}
}
private Stream OpenInReadMode(bool checkOpenable)
{
//省略代码
//_archive.ArchiveStream 压缩包二进制流
//OffsetOfCompressedData 当前文件目录相对于头的偏移量,也就是文件内容读取的开始位置
//_compressedSize 文档内容长度,这个长度是压缩后的长度
Stream compressedStream = new SubReadStream(_archive.ArchiveStream, OffsetOfCompressedData, _compressedSize);
return GetDataDecompressor(compressedStream);
}
//开始读取
private Stream GetDataDecompressor(Stream compressedStreamToRead)
{
Stream? uncompressedStream = null;
//CompressionMethod 压缩方式,根据不同的压缩算法获取文件内容
// Stored = 0x0, Deflate = 0x8, Deflate64 = 0x9, BZip2 = 0xC, LZMA = 0xE
switch (CompressionMethod)
{
case CompressionMethodValues.Deflate:
uncompressedStream = new DeflateStream(compressedStreamToRead, CompressionMode.Decompress, _uncompressedSize);
break;
case CompressionMethodValues.Deflate64:
uncompressedStream = new DeflateManagedStream(compressedStreamToRead, CompressionMethodValues.Deflate64, _uncompressedSize);
break;
case CompressionMethodValues.Stored:
default:
// we can assume that only deflate/deflate64/stored are allowed because we assume that
// IsOpenable is checked before this function is called
Debug.Assert(CompressionMethod == CompressionMethodValues.Stored);
uncompressedStream = compressedStreamToRead;
break;
}
return uncompressedStream;
}
读取到文件内容后则可以进行保存工作
整个解析Zip过程比较清晰,但是内容较多需要慢慢消化,而Zip
数据协议格式接下来会补上。
补充
查看Zip数据协议格式