Android ART 虚拟机 - dex 文件格式要旨(上)

这是个人第一篇写 ART 虚拟机相关的文章,使用的源码是 Android 11(对应 ART 的 android11-release 分支)。决定写这么一些文章,一方面是逼自己认真梳理代码;一方面是想流下一些个东西,希望对后来者有帮助。

参考资料使用的是邓凡平的《深入理解Android——Java虚拟机ART》。书里有的内容,文章基本不会再重复。文章标题之所以称“要旨”,除了原意“要点”,还暗含“笔记”的意思。可以把文章当做我的读书笔记,在书中内容的基础上做了某些扩展

dex file layout

以伪代码的形式展示 dex 文件的布局如下:

dex_file {
    header_item         header;
    string_id_item      string_ids[];       // string identifiers list
    type_id_item        type_ids[];         // type identifiers list
    proto_id_item       proto_ids[];        // method prototype identifiers list
    field_id_item       field_ids[];        // field identifiers list
    method_id_item      method_ids[];       // method identifiers list
    class_def_item      class_defs[];       // class definitions list
    call_site_id_item   call_site_ids[];    // call site identifiers list
    method_handle_item  method_handles[];   // method handles list
    ubyte               data[];
    ubyte               link_data[];        // data used in statically linked files
                                            // format is unspecified
                                            // empty in unlinked files
}
  • 为了加快解析速度,dex 文件是 4 字节对齐的
  • 默认情况下,dex 文件是 little-endian 的(小端)
  • call_site_idsmethod_handles 是 Android 8 新增的。所以邓的书中没有这两个字段

下面我们从 header_item 开始逐个击破。

header_item

header_item 在源码中对应的定义如下:

// art11/libdexfile/dex/dex_file.h
class DexFile {
 public:
  static constexpr size_t kSha1DigestSize = 20;
  static constexpr uint32_t kDexEndianConstant = 0x12345678

  // Raw header_item.
  struct Header {
    uint8_t magic_[8] = {};
    uint32_t checksum_ = 0;  // See also location_checksum_
    uint8_t signature_[kSha1DigestSize] = {};
    uint32_t file_size_ = 0;  // size of entire file
    uint32_t header_size_ = 0;  // offset to start of next section
    uint32_t endian_tag_ = 0;
    uint32_t link_size_ = 0;  // unused
    uint32_t link_off_ = 0;  // unused
    uint32_t map_off_ = 0;  // map list offset from data_off_
    uint32_t string_ids_size_ = 0;  // number of StringIds
    uint32_t string_ids_off_ = 0;  // file offset of StringIds array
    uint32_t type_ids_size_ = 0;  // number of TypeIds, we don't support more than 65535
    uint32_t type_ids_off_ = 0;  // file offset of TypeIds array
    uint32_t proto_ids_size_ = 0;  // number of ProtoIds, we don't support more than 65535
    uint32_t proto_ids_off_ = 0;  // file offset of ProtoIds array
    uint32_t field_ids_size_ = 0;  // number of FieldIds
    uint32_t field_ids_off_ = 0;  // file offset of FieldIds array
    uint32_t method_ids_size_ = 0;  // number of MethodIds
    uint32_t method_ids_off_ = 0;  // file offset of MethodIds array
    uint32_t class_defs_size_ = 0;  // number of ClassDefs
    uint32_t class_defs_off_ = 0;  // file offset of ClassDef array
    uint32_t data_size_ = 0;  // size of data section
    uint32_t data_off_ = 0;  // file offset of data section

    // Decode the dex magic version
    uint32_t GetVersion() const;
  };

    // ...
};
  • magic 的值是 DEX_FILE_MAGIC
ubyte[8] DEX_FILE_MAGIC = { 0x64 0x65 0x78 0x0a 0x30 0x33 0x39 0x00 }
                        = "dex\n039\0"
  • checksum 不包括 magic 和自己
  • signature 不包括 magic、checksum 和 signature
  • endian tag 用于标识文件是大端还是小端。ART 会检查endian_tag_,只有跟 kDexEndianConstant 相等的 dex 文件才是合法的:
bool DexFileVerifier::CheckHeader() {
  // ...

  // Check the contents of the header.
  if (header_->endian_tag_ != DexFile::kDexEndianConstant) {
    ErrorStringPrintf("Unexpected endian_tag: %x", header_->endian_tag_);
    return false;
  }

    // ...
}

string_id_item、type_id_item、field_id_item、proto_id_item、method_id_item

// art11/libdexfile/dex/dex_file_structs.h

namespace art {
namespace dex {

// Raw string_id_item.
struct StringId {
  uint32_t string_data_off_;  // offset in bytes from the base address
};

// Raw type_id_item.
struct TypeId {
  dex::StringIndex descriptor_idx_;  // index into string_ids
};

// Raw field_id_item.
struct FieldId {
  dex::TypeIndex class_idx_;   // index into type_ids_ array for defining class
  dex::TypeIndex type_idx_;    // index into type_ids_ array for field type
  dex::StringIndex name_idx_;  // index into string_ids_ array for field name
};

// Raw proto_id_item.
struct ProtoId {
  dex::StringIndex shorty_idx_;     // index into string_ids array for shorty descriptor
  dex::TypeIndex return_type_idx_;  // index into type_ids array for return type
  uint16_t pad_;                    // padding = 0
  uint32_t parameters_off_;         // file offset to type_list for parameter types
};

// Raw method_id_item.
struct MethodId {
  dex::TypeIndex class_idx_;   // index into type_ids_ array for defining class
  dex::ProtoIndex proto_idx_;  // index into proto_ids_ array for method prototype
  dex::StringIndex name_idx_;  // index into string_ids_ array for method name
};

}   // namespace dex
}   // namespace art

其中,各种 Index 其实就是整型的 class wrapper(简单说,把这些 xxx id 当成 int/short 即可):

// art11/libdexfile/dex/dex_file_types.h
namespace art {
namespace dex {

template
class DexIndex {
 public:
  T index_;

  // ...
}

class ProtoIndex : public DexIndex { ... }
class StringIndex : public DexIndex { ... }
class TypeIndex : public DexIndex { ... }

}   // namespace dex
}   // namespace art

string_data_item

StringId 里只是存着一个 offset,正在的 String 的内容存放在由 dex::StringId.string_data_off_ (从文件头开始算)指向的是 data section 里 string_data_item

string_data_item {
    uleb128     utf16_size;
    ubyte       data[];
}

代码里没有直接用于表示 string_data_item 的 raw type,只提供了一些便捷方法用于读取 string data:

// art11/libdexfile/dex/dex_file-inl.h
inline int32_t DexFile::GetStringLength(const dex::StringId& string_id) const {
  const uint8_t* ptr = DataBegin() + string_id.string_data_off_;
  return DecodeUnsignedLeb128(&ptr);
}

inline const char* DexFile::GetStringDataAndUtf16Length(const dex::StringId& string_id,
                                                        uint32_t* utf16_length) const {
  const uint8_t* ptr = DataBegin() + string_id.string_data_off_;
  *utf16_length = DecodeUnsignedLeb128(&ptr);
  return reinterpret_cast(ptr);
}
  • DataBegin() 返回 dex::DexFiledata_begin_ 字段,指向文件的开头。

也可以通过 dex::StringIndex 来读取相关数据。dex::StringIndex 会先转换为 dex::StringId,然后调用上述两个方法:

// art11/libdexfile/dex/dex_file.h
class DexFile {
 public:
    // Returns the StringId at the specified index.
  const dex::StringId& GetStringId(dex::StringIndex idx) const {
    return string_ids_[idx.index_];
  }
}

type_list

dex::ProtoIdparameters_off_ 指向的是 type_list,用于表示参数列表的类型:

// art11/libdexfile/dex/dex_file_structs.h
namespace art {
namespace dex {

// Raw type_item.
struct TypeItem {
  dex::TypeIndex type_idx_;  // index into type_ids section
}

// Raw type_list.
class TypeList {
 public:
  uint32_t Size() const {
    return size_;
  }

  const TypeItem& GetTypeItem(uint32_t idx) const {
    DCHECK_LT(idx, this->size_);
    return this->list_[idx];
  }

  // Size in bytes of the part of the list that is common.
  static constexpr size_t GetHeaderSize() {
    return 4U;
  }

  // Size in bytes of the whole type list including all the stored elements.
  static constexpr size_t GetListSize(size_t count) {
    return GetHeaderSize() + sizeof(TypeItem) * count;
  }

 private:
  uint32_t size_;  // size of the list, in entries
  TypeItem list_[1];  // elements of the list
  DISALLOW_COPY_AND_ASSIGN(TypeList);
};


}   // namespace dex
}   // namespace art

class_def_item

class_def_item 的内容加上后这一篇文章就太长了,放到下一篇再继续。

你可能感兴趣的:(Android ART 虚拟机 - dex 文件格式要旨(上))