DiskCache
blockfile: mac上的chrome以及ios使用的该类cache
memory: 隐私模式使用该类的cache
simple cache: android linux 以及 mac上的chromium使用这种
为了方便,我们并不需要编译chromium,chromium项目对于每个模块都有相应的测试单元,我们只需编译net模块的测试单元。
添加我们的测试单元并编译net_unittests
Ubuntu16.04 + 8G内存以上
下载depot_tools
git clone https://gitlab.com/wanghui0705/depot_tools.git
将depot_tools配置到环境变量
下载代码
git clone https://gitlab.com/wanghui0705/chromium.git
切换到src目录 应用以下patch, 创建5个entry, 然后我们根据生成的cache文件来分析
diff --git a/net/disk_cache/entry_unittest.cc b/net/disk_cache/entry_unittest.cc
index 3fd25fa54cf7..6751f293a6b2 100644
--- a/net/disk_cache/entry_unittest.cc
+++ b/net/disk_cache/entry_unittest.cc
@@ -4533,6 +4533,40 @@ TEST_F(DiskCacheEntryTest, SimpleCacheTruncateLargeSparseFile) {
entry->Close();
}
+TEST_F(DiskCacheEntryTest, SimpleCacheMultiEntryTest) {
+ SetCacheType(net::DISK_CACHE);
+ SetSimpleCacheMode();
+ InitCache();
+ std::vector<std::string> entry_keys = {
+ "first",
+ "second",
+ "third",
+ "forth",
+ "fifth"
+ };
+
+ for(auto key : entry_keys) {
+ disk_cache::Entry* entry;
+ if(CreateEntry(key, &entry) == net::OK) {
+ std::string content_0_data = key + " content_0_data";
+ scoped_refptr<net::IOBuffer> content_0_iobuffer(
+ new net::StringIOBuffer(content_0_data));
+ WriteData(entry, 0, 0, content_0_iobuffer.get(),
+ content_0_data.size(), false);
+ std::string content_1_data = key + " content_1_data";
+ scoped_refptr<net::IOBuffer> content_1_iobuffer(
+ new net::StringIOBuffer(content_1_data));
+ WriteData(entry, 1, 0, content_1_iobuffer.get(),
+ content_1_data.size(), false);
+ entry->Close();
+ }
+ }
+
+ base::RunLoop().Run();
+}
+
1)build/install-build-deps-android.sh
2)gn gen --args='target_os="android"' out/Default
3) ninja -C out/Default net_unittests
4) out/Default/net_unittests --gtest_filter=DiskCacheEntryTest.SimpleCacheMultiEntryTest
第四部执行我们的test,为了不让cache删除,最后使用base::RunLoop::Run(), 一直运行直到timeout,我们重点得到cache'文件并根据文件来分析cache结构
得到的文件结构如下
lichenludeMacBook-Pro:src wanghui$ ls /var/folders/fy/xxs2fflj7tqftj7xv7_4j8d80000gp/T/.org.chromium.Chromium.3WWWaS/
004b38a229782f35_0 04a6facda006712f_0 2d8f0ac94831d45a_0 c3443dc1376a99e0_0 eb7ba7b90033fb34_0 index index-dir
可以看到有四个data文件, 一个index文件 一个index-dir文件夹
lichenludeMacBook-Pro:.org.chromium.Chromium.3WWWaS wanghui$ ls index-dir/
the-real-index
the-real-index文件就是SimpleIndexFile文件写入的数据
Index文件: 包含index文件的元数据 以及 各个entry data文件的元数据。
SimpleIndexFile 写入文件的时候调用这个函数, 这个函数主要创建并初始化index file的IndexMetadata,并序列化。
void SimpleIndexFile::WriteToDisk(SimpleIndex::IndexWriteToDiskReason reason,
const SimpleIndex::EntrySet& entry_set,
uint64_t cache_size,
const base::TimeTicks& start,
bool app_on_background,
const base::Closure& callback) {
UmaRecordIndexWriteReason(reason, cache_type_);
// 创建IndexFile的元数据
IndexMetadata index_metadata(reason, entry_set.size(), cache_size);
std::unique_ptr<base::Pickle> pickle = Serialize(index_metadata, entry_set);
base::Closure task =
base::Bind(&SimpleIndexFile::SyncWriteToDisk,
cache_type_, cache_directory_, index_file_, temp_index_file_,
base::Passed(&pickle), start, app_on_background);
if (callback.is_null())
cache_runner_->PostTask(FROM_HERE, task);
else
cache_runner_->PostTaskAndReply(FROM_HERE, task, callback);
}
序列化函数,会将index_metadata 跟每个entry的EntryMetadata序列化到一个base::Pickle结构
using EntrySet = std::unordered_map ;
// static
std::unique_ptr SimpleIndexFile::Serialize(
const SimpleIndexFile::IndexMetadata& index_metadata,
const SimpleIndex::EntrySet& entries) {
std::unique_ptr pickle = std::make_unique();
index_metadata.Serialize(pickle.get());
for (SimpleIndex::EntrySet::const_iterator it = entries.begin();
it != entries.end(); ++it) {
pickle->WriteUInt64(it->first);
it->second.Serialize(pickle.get());
}
return pickle;
}
IndexMetadata 序列化
void SimpleIndexFile::IndexMetadata::Serialize(base::Pickle* pickle) const {
DCHECK(pickle);
pickle->WriteUInt64(magic_number_);
pickle->WriteUInt32(version_);
pickle->WriteUInt64(entry_count_);
pickle->WriteUInt64(cache_size_);
pickle->WriteUInt32(static_cast(reason_));
}
magic_number
const uint64_t kSimpleIndexMagicNumber = UINT64_C(0x656e74657220796f);
打开之前生成的the-real-index文件
1 00000000: a000 0000 fa80 215e 6f79 2072 6574 6e65 ......!^oy retne
2 00000010: 0800 0000 0500 0000 0000 0000 0005 0000 ................
3 00000020: 0000 0000 0200 0000 5ad4 3148 c90a 8f2d ........Z.1H...-
4 00000030: c05b a283 afd0 2e00 0001 0000 0000 0000 .[..............
5 00000040: 2f71 06a0 cdfa a604 c05b a283 afd0 2e00 /q.......[......
6 00000050: 0001 0000 0000 0000 34fb 3300 b9a7 7beb ........4.3...{.
7 00000060: c05b a283 afd0 2e00 0001 0000 0000 0000 .[..............
8 00000070: 352f 7829 a238 4b00 c05b a283 afd0 2e00 5/x).8K..[......
9 00000080: 0001 0000 0000 0000 e099 6a37 c13d 44c3 ..........j7.=D.
10 00000090: c05b a283 afd0 2e00 0001 0000 0000 0000 .[..............
11 000000a0: ff96 a683 afd0 2e00 0a .........
前8个字节是PickleHeader是个crc
a000 0000 3b45 76b8
payload : 0xa0 160个字节
crc : b876 453b
// Payload follows after allocation of Header (header size is customizable).
struct Header {
uint32_t payload_size; // Specifies the size of the payload.
};
struct PickleHeader : public base::Pickle::Header {
uint32_t crc;
};
8-16字节是 magic number
6f79 2072 6574 6e65
16-20字节 version
0800 0000 => const uint32_t kSimpleVersion = 8;
20-28字节 entry个数
0500 0000 0000 0000
EntryMetadata序列化
五个文件的hash值
004b38a229782f35_0
04a6facda006712f_0
2d8f0ac94831d45a_0
c3443dc1376a99e0_0
eb7ba7b90033fb34_0
void EntryMetadata::Serialize(base::Pickle* pickle) const {
DCHECK(pickle);
int64_t internal_last_used_time = GetLastUsedTime().ToInternalValue();
// If you modify the size of the size of the pickle, be sure to update
// kOnDiskSizeBytes.
uint32_t packed_entry_info = (entry_size_256b_chunks_ << 8) | in_memory_data_;
pickle->WriteInt64(internal_last_used_time);
pickle->WriteUInt64(packed_entry_info);
}
real-index-file 第41-48个字节
5ad4 3148 c90a 8f2d => 2d8f0ac94831d45a_0
文件大小
0001 0000 0000 0000 => 0x0100 * 256 = 1024 文件最小为 1024字节。
Data文件
随便找一个data文件
1 00000000: 305c 72a7 1b6d fbfc 0500 0000 0600 0000 0\r..m..........
2 00000010: e3bc 0f09 0000 0000 7365 636f 6e64 7365 ........secondse
3 00000020: 636f 6e64 2063 6f6e 7465 6e74 5f31 5f64 cond content_1_d
4 00000030: 6174 61d8 410d 9745 6ffa f401 0000 00a1 ata.A..Eo.......
5 00000040: 35de 6915 0000 0000 0000 0073 6563 6f6e 5.i........secon
6 00000050: 6420 636f 6e74 656e 745f 305f 6461 7461 d content_0_data
7 00000060: 1636 7aac b67a 4a01 7c8d a8ab 9568 2ccb .6z..zJ.|....h,.
8 00000070: 3908 6378 0f71 14dd a0a0 e0c5 5644 c7c4 9.cx.q......VD..
9 00000080: d841 0d97 456f faf4 0300 0000 04e6 82a2 .A..Eo..........
10 00000090: 1500 0000 0000 0000 0a .........
data文件是由entry写入的
SimpleEntryFormat magic number 主要有三个
const uint64_t kSimpleInitialMagicNumber = UINT64_C(0xfcfb6d1ba7725c30);
const uint64_t kSimpleFinalMagicNumber = UINT64_C(0xf4fa6f45970d41d8);
const uint64_t kSimpleSparseRangeMagicNumber = UINT64_C(0xeb97bf016553676b);
SimpleEntryFormat 结构如下
// A file containing stream 0 and stream 1 in the Simple cache consists of:
// - a SimpleFileHeader.
// - the key.
// - the data from stream 1.
// - a SimpleFileEOF record for stream 1.
// - the data from stream 0.
// - (optionally) the SHA256 of the key.
// - a SimpleFileEOF record for stream 0.
stream0 存HTTP HEADER, stream1 存content
SimpleFileHeader结构如下
struct NET_EXPORT_PRIVATE SimpleFileHeader {
SimpleFileHeader();
uint64_t initial_magic_number;
uint32_t version;
uint32_t key_length;
uint32_t key_hash;
};
data file 前8个字节
305c 72a7 1b6d fbfc => kSimpleInitialMagicNumber
version
0500 0000 => const uint32_t kSimpleEntryVersionOnDisk = 5;
key_length 还有 key_hash
e3bc 0f09 => base::Hash(key);
0600 0000 => 6个字节的key "second"
sizeof (SimpleFileHeader) = 24
所以会有四个字节的0000 0000
后面是key + stream1
2 00000010: 7365 636f 6e64 7365 ........secondse
3 00000020: 636f 6e64 2063 6f6e 7465 6e74 5f31 5f64 cond content_1_d
4 00000030: 6174 61
key => "second"
data => "second content_1_data"
stream1 后面会跟随一个SimpleFileEOF
struct NET_EXPORT_PRIVATE SimpleFileEOF {
enum Flags {
FLAG_HAS_CRC32 = (1U << 0),
FLAG_HAS_KEY_SHA256 = (1U << 1), // Preceding the record if present.
};
SimpleFileEOF();
uint64_t final_magic_number;
uint32_t flags;
uint32_t data_crc32;
// |stream_size| is only used in the EOF record for stream 0.
uint32_t stream_size;
};
d841 0d97 456f faf4 => kSimpleFinalMagicNumber
4 00000030: 01 0000 00a1 ata.A..Eo.......
5 00000040: 35de 6915 0000 0000 0000 00
flags => 0100 0000
data_crc32 => a135 de69
stream_size => 1500 0000 => 0x15 21个字节 "second content_1_data"
这个结构体的大小是24字节 所以后面会有 0000 0000
再后面的以此类推了。