这是个人第一篇写 ART 虚拟机相关的文章,使用的源码是 Android 11(对应 ART 的 android11-release
分支)。决定写这么一些文章,一方面是逼自己认真梳理代码;一方面是想流下一些个东西,希望对后来者有帮助。
参考资料使用的是邓凡平的《深入理解Android——Java虚拟机ART》。书里有的内容,文章基本不会再重复。文章标题之所以称“要旨”,除了原意“要点”,还暗含“笔记”的意思。可以把文章当做我的读书笔记,在书中内容的基础上做了某些扩展。
dex file layout
以伪代码的形式展示 dex 文件的布局如下:
dex_file {
header_item header;
string_id_item string_ids[]; // string identifiers list
type_id_item type_ids[]; // type identifiers list
proto_id_item proto_ids[]; // method prototype identifiers list
field_id_item field_ids[]; // field identifiers list
method_id_item method_ids[]; // method identifiers list
class_def_item class_defs[]; // class definitions list
call_site_id_item call_site_ids[]; // call site identifiers list
method_handle_item method_handles[]; // method handles list
ubyte data[];
ubyte link_data[]; // data used in statically linked files
// format is unspecified
// empty in unlinked files
}
- 为了加快解析速度,dex 文件是 4 字节对齐的
- 默认情况下,dex 文件是 little-endian 的(小端)
-
call_site_ids
和method_handles
是 Android 8 新增的。所以邓的书中没有这两个字段
下面我们从 header_item 开始逐个击破。
header_item
header_item 在源码中对应的定义如下:
// art11/libdexfile/dex/dex_file.h
class DexFile {
public:
static constexpr size_t kSha1DigestSize = 20;
static constexpr uint32_t kDexEndianConstant = 0x12345678
// Raw header_item.
struct Header {
uint8_t magic_[8] = {};
uint32_t checksum_ = 0; // See also location_checksum_
uint8_t signature_[kSha1DigestSize] = {};
uint32_t file_size_ = 0; // size of entire file
uint32_t header_size_ = 0; // offset to start of next section
uint32_t endian_tag_ = 0;
uint32_t link_size_ = 0; // unused
uint32_t link_off_ = 0; // unused
uint32_t map_off_ = 0; // map list offset from data_off_
uint32_t string_ids_size_ = 0; // number of StringIds
uint32_t string_ids_off_ = 0; // file offset of StringIds array
uint32_t type_ids_size_ = 0; // number of TypeIds, we don't support more than 65535
uint32_t type_ids_off_ = 0; // file offset of TypeIds array
uint32_t proto_ids_size_ = 0; // number of ProtoIds, we don't support more than 65535
uint32_t proto_ids_off_ = 0; // file offset of ProtoIds array
uint32_t field_ids_size_ = 0; // number of FieldIds
uint32_t field_ids_off_ = 0; // file offset of FieldIds array
uint32_t method_ids_size_ = 0; // number of MethodIds
uint32_t method_ids_off_ = 0; // file offset of MethodIds array
uint32_t class_defs_size_ = 0; // number of ClassDefs
uint32_t class_defs_off_ = 0; // file offset of ClassDef array
uint32_t data_size_ = 0; // size of data section
uint32_t data_off_ = 0; // file offset of data section
// Decode the dex magic version
uint32_t GetVersion() const;
};
// ...
};
- magic 的值是
DEX_FILE_MAGIC
ubyte[8] DEX_FILE_MAGIC = { 0x64 0x65 0x78 0x0a 0x30 0x33 0x39 0x00 }
= "dex\n039\0"
- checksum 不包括 magic 和自己
- signature 不包括 magic、checksum 和 signature
- endian tag 用于标识文件是大端还是小端。ART 会检查
endian_tag_
,只有跟kDexEndianConstant
相等的 dex 文件才是合法的:
bool DexFileVerifier::CheckHeader() {
// ...
// Check the contents of the header.
if (header_->endian_tag_ != DexFile::kDexEndianConstant) {
ErrorStringPrintf("Unexpected endian_tag: %x", header_->endian_tag_);
return false;
}
// ...
}
string_id_item、type_id_item、field_id_item、proto_id_item、method_id_item
// art11/libdexfile/dex/dex_file_structs.h
namespace art {
namespace dex {
// Raw string_id_item.
struct StringId {
uint32_t string_data_off_; // offset in bytes from the base address
};
// Raw type_id_item.
struct TypeId {
dex::StringIndex descriptor_idx_; // index into string_ids
};
// Raw field_id_item.
struct FieldId {
dex::TypeIndex class_idx_; // index into type_ids_ array for defining class
dex::TypeIndex type_idx_; // index into type_ids_ array for field type
dex::StringIndex name_idx_; // index into string_ids_ array for field name
};
// Raw proto_id_item.
struct ProtoId {
dex::StringIndex shorty_idx_; // index into string_ids array for shorty descriptor
dex::TypeIndex return_type_idx_; // index into type_ids array for return type
uint16_t pad_; // padding = 0
uint32_t parameters_off_; // file offset to type_list for parameter types
};
// Raw method_id_item.
struct MethodId {
dex::TypeIndex class_idx_; // index into type_ids_ array for defining class
dex::ProtoIndex proto_idx_; // index into proto_ids_ array for method prototype
dex::StringIndex name_idx_; // index into string_ids_ array for method name
};
} // namespace dex
} // namespace art
其中,各种 Index 其实就是整型的 class wrapper(简单说,把这些 xxx id 当成 int/short 即可):
// art11/libdexfile/dex/dex_file_types.h
namespace art {
namespace dex {
template
class DexIndex {
public:
T index_;
// ...
}
class ProtoIndex : public DexIndex { ... }
class StringIndex : public DexIndex { ... }
class TypeIndex : public DexIndex { ... }
} // namespace dex
} // namespace art
string_data_item
StringId 里只是存着一个 offset,正在的 String 的内容存放在由 dex::StringId.string_data_off_
(从文件头开始算)指向的是 data section 里 string_data_item
string_data_item {
uleb128 utf16_size;
ubyte data[];
}
代码里没有直接用于表示 string_data_item
的 raw type,只提供了一些便捷方法用于读取 string data:
// art11/libdexfile/dex/dex_file-inl.h
inline int32_t DexFile::GetStringLength(const dex::StringId& string_id) const {
const uint8_t* ptr = DataBegin() + string_id.string_data_off_;
return DecodeUnsignedLeb128(&ptr);
}
inline const char* DexFile::GetStringDataAndUtf16Length(const dex::StringId& string_id,
uint32_t* utf16_length) const {
const uint8_t* ptr = DataBegin() + string_id.string_data_off_;
*utf16_length = DecodeUnsignedLeb128(&ptr);
return reinterpret_cast(ptr);
}
- DataBegin() 返回
dex::DexFile
的data_begin_
字段,指向文件的开头。
也可以通过 dex::StringIndex
来读取相关数据。dex::StringIndex
会先转换为 dex::StringId
,然后调用上述两个方法:
// art11/libdexfile/dex/dex_file.h
class DexFile {
public:
// Returns the StringId at the specified index.
const dex::StringId& GetStringId(dex::StringIndex idx) const {
return string_ids_[idx.index_];
}
}
type_list
dex::ProtoId
的 parameters_off_
指向的是 type_list,用于表示参数列表的类型:
// art11/libdexfile/dex/dex_file_structs.h
namespace art {
namespace dex {
// Raw type_item.
struct TypeItem {
dex::TypeIndex type_idx_; // index into type_ids section
}
// Raw type_list.
class TypeList {
public:
uint32_t Size() const {
return size_;
}
const TypeItem& GetTypeItem(uint32_t idx) const {
DCHECK_LT(idx, this->size_);
return this->list_[idx];
}
// Size in bytes of the part of the list that is common.
static constexpr size_t GetHeaderSize() {
return 4U;
}
// Size in bytes of the whole type list including all the stored elements.
static constexpr size_t GetListSize(size_t count) {
return GetHeaderSize() + sizeof(TypeItem) * count;
}
private:
uint32_t size_; // size of the list, in entries
TypeItem list_[1]; // elements of the list
DISALLOW_COPY_AND_ASSIGN(TypeList);
};
} // namespace dex
} // namespace art
class_def_item
class_def_item 的内容加上后这一篇文章就太长了,放到下一篇再继续。