我在博客上发表一些我的Android学习心得,希望对大家能有帮助。
这一篇我们讲述一下Android可执行文件dex的结构解析。
参考Leb128数据类型 Android学习心得(5) --- dex数据类型LEB128
参考实例分析学习理解dex文件结构Android学习心得(16) --- Dex文件结构解析(2)
Android应用开发和Dalvik虚拟机Android应用所使用的编程语言是Java语言,在编译时使用JDK将Java源程序编程成标准的Java字节码文件。
而后通过工具软件DX把所有的字节码文件转成Android DEX文件(classes.dex)。
最后使用Android打包工具(aapt)将DEX文件,资源文件以及AndroidManifest.xml文件(二进制格式)组合成一个应用程序包(APK)。
应用程序包可以被发布到手机上运行。
从中我们可以看到,dex文件是由多个结构体组成。
Dex Header是dex头文件,定义了一些属性,记录了其他数据结构的偏移地址。
从String table ~ Class Def table是偏移索引的区域。
Data Section则是真实数据存放地方。
首先我们通过/dalvik/libdex/DexFile.h查看DexFile结构
/* * Structure representing a DEX file. * * Code should regard DexFile as opaque, using the API calls provided here * to access specific structures. */
typedef struct DexFile {
/* directly-mapped "opt" header */
const DexOptHeader* pOptHeader;
/* pointers to directly-mapped structs and arrays in base DEX */
const DexHeader* pHeader;
const DexStringId* pStringIds;
const DexTypeId* pTypeIds;
const DexFieldId* pFieldIds;
const DexMethodId* pMethodIds;
const DexProtoId* pProtoIds;
const DexClassDef* pClassDefs;
const DexLink* pLinkData;
/* mapped in "auxillary" section */
const DexClassLookup* pClassLookup;
/* points to start of DEX file data */
const u1* baseAddr;
/* track memory overhead for auxillary structures */
int overhead;
/* additional app-specific data structures associated with the DEX */
void* auxData;
} DexFile;
我们通过/dalvik/libdex/DexFile.h查看header_item
/* * Direct-mapped "header_item" struct. */
typedef struct DexHeader {
u1 magic[8]; /* includes version number */
u4 checksum; /* adler32 checksum */
u1 signature[kSHA1DigestLen]; /* SHA-1 hash */
u4 fileSize; /* length of entire file */
u4 headerSize; /* offset to start of next section */
u4 endianTag; /*字节序标号*/
u4 linkSize;
u4 linkOff;
u4 mapOff;
u4 stringIdsSize;
u4 stringIdsOff;
u4 typeIdsSize;
u4 typeIdsOff;
u4 protoIdsSize;
u4 protoIdsOff;
u4 fieldIdsSize;
u4 fieldIdsOff;
u4 methodIdsSize;
u4 methodIdsOff;
u4 classDefsSize;
u4 classDefsOff;
u4 dataSize;
u4 dataOff;
} DexHeader;
/*
* Direct-mapped "map_item".
*/
typedef struct DexMapItem {
u2 type; /* type code (see kDexType* above) */
u2 unused;
u4 size; /* count of items of the indicated type */
u4 offset; /* file offset to the start of data */
} DexMapItem;
/*
* Direct-mapped "map_list".
*/
typedef struct DexMapList {
u4 size; /* #of entries in list */
DexMapItem list[1]; /* entries */
} DexMapList;
在DexMapItem结构中,type是一个枚举常量
/* map item type codes */
enum {
kDexTypeHeaderItem = 0x0000,
kDexTypeStringIdItem = 0x0001,
kDexTypeTypeIdItem = 0x0002,
kDexTypeProtoIdItem = 0x0003,
kDexTypeFieldIdItem = 0x0004,
kDexTypeMethodIdItem = 0x0005,
kDexTypeClassDefItem = 0x0006,
kDexTypeMapList = 0x1000,
kDexTypeTypeList = 0x1001,
kDexTypeAnnotationSetRefList = 0x1002,
kDexTypeAnnotationSetItem = 0x1003,
kDexTypeClassDataItem = 0x2000,
kDexTypeCodeItem = 0x2001,
kDexTypeStringDataItem = 0x2002,
kDexTypeDebugInfoItem = 0x2003,
kDexTypeAnnotationItem = 0x2004,
kDexTypeEncodedArrayItem = 0x2005,
kDexTypeAnnotationsDirectoryItem = 0x2006,
};
通过DexMapList结构生成的表格,来分别找出其中DexMapItem中的结构
由于class_def_item比较复杂,单独叙述
/*
* Direct-mapped "string_id_item".
*/
typedef struct DexStringId {
u4 stringDataOff; /* file offset to string_data_item */
} DexStringId;
/*
* Direct-mapped "type_id_item".
*/
typedef struct DexTypeId {
u4 descriptorIdx; /* index into stringIds list for type descriptor */
} DexTypeId;
/*
* Direct-mapped "field_id_item".
*/
typedef struct DexFieldId {
u2 classIdx; /* index into typeIds list for defining class */
u2 typeIdx; /* index into typeIds for field type */
u4 nameIdx; /* index into stringIds for field name */
} DexFieldId;
/*
* Direct-mapped "method_id_item".
*/
typedef struct DexMethodId {
u2 classIdx; /* index into typeIds list for defining class */
u2 protoIdx; /* index into protoIds for method prototype */
u4 nameIdx; /* index into stringIds for method name */
} DexMethodId;
/*
* Direct-mapped "proto_id_item".
*/
typedef struct DexProtoId {
u4 shortyIdx; /* index into stringIds for shorty descriptor */
u4 returnTypeIdx; /* index into typeIds list for return type */
u4 parametersOff; /* file offset to type_list for parameter types */
} DexProtoId;
在DexProtoId结构中,parameterOff是type_list的偏移
/* * Direct-mapped "type_item". */
typedef struct DexTypeItem {
u2 typeIdx; /* index into typeIds */
} DexTypeItem;
/* * Direct-mapped "type_list". */
typedef struct DexTypeList {
u4 size; /* #of entries in list */
DexTypeItem list[1]; /* entries */
} DexTypeList;
/*
* Direct-mapped "class_def_item".
*/
typedef struct DexClassDef {
u4 classIdx; /* index into typeIds for this class */
u4 accessFlags;
u4 superclassIdx; /* index into typeIds for superclass */
u4 interfacesOff; /* file offset to DexTypeList */
u4 sourceFileIdx; /* index into stringIds for source file name */
u4 annotationsOff; /* file offset to annotations_directory_item */
u4 classDataOff; /* file offset to class_data_item */
u4 staticValuesOff; /* file offset to DexEncodedArray */
} DexClassDef;
annotationsOff是注解目录结构偏移,目前暂不详细说明,如果没有注解,其值为0。
classDataOff是指向class_data_item偏移,下面介绍其结构
路径为/dalvik/libdex/DexClass.h
/* expanded form of class_data_item. Note: If a particular item is
* absent (e.g., no static fields), then the corresponding pointer
* is set to NULL. */
typedef struct DexClassData {
DexClassDataHeader header;
DexField* staticFields;
DexField* instanceFields;
DexMethod* directMethods;
DexMethod* virtualMethods;
} DexClassData;
/* expanded form of a class_data_item header */
typedef struct DexClassDataHeader {
u4 staticFieldsSize;
u4 instanceFieldsSize;
u4 directMethodsSize;
u4 virtualMethodsSize;
} DexClassDataHeader;
DexField & DexMethod
/* expanded form of encoded_field */
typedef struct DexField {
u4 fieldIdx; /* index to a field_id_item */
u4 accessFlags;
} DexField;
/* expanded form of encoded_method */
typedef struct DexMethod {
u4 methodIdx; /* index to a method_id_item */
u4 accessFlags;
u4 codeOff; /* file offset to a code_item */
} DexMethod;
code_item位于/dalvik/libdex/DexFile.h中定义
/*
* Direct-mapped "code_item".
*
* The "catches" table is used when throwing an exception,
* "debugInfo" is used when displaying an exception stack trace or
* debugging. An offset of zero indicates that there are no entries.
*/
typedef struct DexCode {
u2 registersSize;
u2 insSize;
u2 outsSize;
u2 triesSize;
u4 debugInfoOff; /* file offset to debug info stream */
u4 insnsSize; /* size of the insns array, in u2 units */
u2 insns[1];
/* followed by optional u2 padding */
/* followed by try_item[triesSize] */
/* followed by uleb128 handlersSize */
/* followed by catch_handler_item[handlersSize] */
} DexCode;
到目前为止,基本上我们把dex文件组成结构给大家展示了出来。
下面要进行的就是对dex文件实例结合这篇来进行分析
Android学习心得(16) --- Dex文件结构解析(2)