Android学习心得(15) --- Dex文件结构解析(1)

我在博客上发表一些我的Android学习心得,希望对大家能有帮助。
这一篇我们讲述一下Android可执行文件dex的结构解析。


参考Leb128数据类型 Android学习心得(5) --- dex数据类型LEB128
参考实例分析学习理解dex文件结构Android学习心得(16) --- Dex文件结构解析(2)


1、Dex背景

    Android应用开发和Dalvik虚拟机Android应用所使用的编程语言是Java语言,在编译时使用JDK将Java源程序编程成标准的Java字节码文件。
    而后通过工具软件DX把所有的字节码文件转成Android DEX文件(classes.dex)。
    最后使用Android打包工具(aapt)将DEX文件,资源文件以及AndroidManifest.xml文件(二进制格式)组合成一个应用程序包(APK)。 
    应用程序包可以被发布到手机上运行。 

2、Dex文件整体结构

Android学习心得(15) --- Dex文件结构解析(1)_第1张图片
Android学习心得(15) --- Dex文件结构解析(1)_第2张图片
Android学习心得(15) --- Dex文件结构解析(1)_第3张图片

从中我们可以看到,dex文件是由多个结构体组成。
Dex Header是dex头文件,定义了一些属性,记录了其他数据结构的偏移地址。
从String table ~ Class Def table是偏移索引的区域。
Data Section则是真实数据存放地方。 

首先我们通过/dalvik/libdex/DexFile.h查看DexFile结构

/*
 * Structure representing a DEX file.
 *
 * Code should regard DexFile as opaque, using the API calls provided here
 * to access specific structures.
 */
typedef struct DexFile {
    /* directly-mapped "opt" header */
    const DexOptHeader* pOptHeader;

    /* pointers to directly-mapped structs and arrays in base DEX */
    const DexHeader*    pHeader;
    const DexStringId*  pStringIds;
    const DexTypeId*    pTypeIds;
    const DexFieldId*   pFieldIds;
    const DexMethodId*  pMethodIds;
    const DexProtoId*   pProtoIds;
    const DexClassDef*  pClassDefs;
    const DexLink*      pLinkData;

    /* mapped in "auxillary" section */
    const DexClassLookup* pClassLookup;

    /* points to start of DEX file data */
    const u1*           baseAddr;

    /* track memory overhead for auxillary structures */
    int                 overhead;

    /* additional app-specific data structures associated with the DEX */
    void*               auxData;
} DexFile;

3、Dex Header解析

我们通过/dalvik/libdex/DexFile.h查看header_item

/*
 * Direct-mapped "header_item" struct.
 */
typedef struct DexHeader {
    u1  magic[8];           /* includes version number */
    u4  checksum;           /* adler32 checksum */
    u1  signature[kSHA1DigestLen]; /* SHA-1 hash */
    u4  fileSize;           /* length of entire file */
    u4  headerSize;         /* offset to start of next section */
    u4  endianTag;          /*字节序标号*/
    u4  linkSize;
    u4  linkOff;
    u4  mapOff;
    u4  stringIdsSize;
    u4  stringIdsOff;
    u4  typeIdsSize;
    u4  typeIdsOff;
    u4  protoIdsSize;
    u4  protoIdsOff;
    u4  fieldIdsSize;
    u4  fieldIdsOff;
    u4  methodIdsSize;
    u4  methodIdsOff;
    u4  classDefsSize;
    u4  classDefsOff;
    u4  dataSize;
    u4  dataOff;
} DexHeader;

Android学习心得(15) --- Dex文件结构解析(1)_第4张图片
Android学习心得(15) --- Dex文件结构解析(1)_第5张图片


4、Mapoff指向MapList

DexHeader结构中MapList数据结构
Android学习心得(15) --- Dex文件结构解析(1)_第6张图片

/*
 * Direct-mapped "map_item".
 */
typedef struct DexMapItem {
    u2  type;              /* type code (see kDexType* above) */
    u2  unused;
    u4  size;              /* count of items of the indicated type */
    u4  offset;            /* file offset to the start of data */
} DexMapItem;

/*
 * Direct-mapped "map_list".
 */
typedef struct DexMapList {
    u4  size;               /* #of entries in list */
    DexMapItem list[1];     /* entries */
} DexMapList;

在DexMapItem结构中,type是一个枚举常量

/* map item type codes */
 enum {
     kDexTypeHeaderItem               = 0x0000,
     kDexTypeStringIdItem             = 0x0001,
     kDexTypeTypeIdItem               = 0x0002,
     kDexTypeProtoIdItem              = 0x0003,
     kDexTypeFieldIdItem              = 0x0004,
     kDexTypeMethodIdItem             = 0x0005,
     kDexTypeClassDefItem             = 0x0006,
     kDexTypeMapList                  = 0x1000,
     kDexTypeTypeList                 = 0x1001,
     kDexTypeAnnotationSetRefList     = 0x1002,
     kDexTypeAnnotationSetItem        = 0x1003,
     kDexTypeClassDataItem            = 0x2000,
     kDexTypeCodeItem                 = 0x2001,
     kDexTypeStringDataItem           = 0x2002,
     kDexTypeDebugInfoItem            = 0x2003,
     kDexTypeAnnotationItem           = 0x2004,
     kDexTypeEncodedArrayItem         = 0x2005,
     kDexTypeAnnotationsDirectoryItem = 0x2006,
 };

5、部分结构

通过DexMapList结构生成的表格,来分别找出其中DexMapItem中的结构
由于class_def_item比较复杂,单独叙述

/*
  * Direct-mapped "string_id_item".
  */
 typedef struct DexStringId {
     u4  stringDataOff;      /* file offset to string_data_item */
 } DexStringId;

 /*
  * Direct-mapped "type_id_item".
  */
 typedef struct DexTypeId {
     u4  descriptorIdx;      /* index into stringIds list for type descriptor */
 } DexTypeId;

 /*
  * Direct-mapped "field_id_item".
  */
 typedef struct DexFieldId {
     u2  classIdx;           /* index into typeIds list for defining class */
     u2  typeIdx;            /* index into typeIds for field type */
     u4  nameIdx;            /* index into stringIds for field name */
 } DexFieldId;

 /*
  * Direct-mapped "method_id_item".
  */
 typedef struct DexMethodId {
     u2  classIdx;           /* index into typeIds list for defining class */
     u2  protoIdx;           /* index into protoIds for method prototype */
     u4  nameIdx;            /* index into stringIds for method name */
 } DexMethodId;

 /*
  * Direct-mapped "proto_id_item".
  */
 typedef struct DexProtoId {
     u4  shortyIdx;          /* index into stringIds for shorty descriptor */
     u4  returnTypeIdx;      /* index into typeIds list for return type */
     u4  parametersOff;      /* file offset to type_list for parameter types */
 } DexProtoId;

在DexProtoId结构中,parameterOff是type_list的偏移

/*
  * Direct-mapped "type_item".
  */
 typedef struct DexTypeItem {
     u2  typeIdx;            /* index into typeIds */
 } DexTypeItem;

 /*
  * Direct-mapped "type_list".
  */
 typedef struct DexTypeList {
     u4  size;               /* #of entries in list */
     DexTypeItem list[1];    /* entries */
 } DexTypeList;

6、DexClassDef结构

/*
  * Direct-mapped "class_def_item".
  */
 typedef struct DexClassDef {
     u4  classIdx;           /* index into typeIds for this class */
     u4  accessFlags;
     u4  superclassIdx;      /* index into typeIds for superclass */
     u4  interfacesOff;      /* file offset to DexTypeList */
     u4  sourceFileIdx;      /* index into stringIds for source file name */
     u4  annotationsOff;     /* file offset to annotations_directory_item */
     u4  classDataOff;       /* file offset to class_data_item */
     u4  staticValuesOff;    /* file offset to DexEncodedArray */
 } DexClassDef;

annotationsOff是注解目录结构偏移,目前暂不详细说明,如果没有注解,其值为0。

classDataOff是指向class_data_item偏移,下面介绍其结构
路径为/dalvik/libdex/DexClass.h

/* expanded form of class_data_item. Note: If a particular item is
  * absent (e.g., no static fields), then the corresponding pointer
  * is set to NULL. */
 typedef struct DexClassData {
     DexClassDataHeader header;
     DexField*          staticFields;
     DexField*          instanceFields;
     DexMethod*         directMethods;
     DexMethod*         virtualMethods;
 } DexClassData;


/* expanded form of a class_data_item header */
 typedef struct DexClassDataHeader {
     u4 staticFieldsSize;
     u4 instanceFieldsSize;
     u4 directMethodsSize;
     u4 virtualMethodsSize;
 } DexClassDataHeader;

DexField & DexMethod

 /* expanded form of encoded_field */
 typedef struct DexField {
     u4 fieldIdx;    /* index to a field_id_item */
     u4 accessFlags;
 } DexField;

 /* expanded form of encoded_method */
 typedef struct DexMethod {
     u4 methodIdx;    /* index to a method_id_item */
     u4 accessFlags;
     u4 codeOff;      /* file offset to a code_item */
 } DexMethod;

code_item位于/dalvik/libdex/DexFile.h中定义

/*
  * Direct-mapped "code_item".
  *
  * The "catches" table is used when throwing an exception,
  * "debugInfo" is used when displaying an exception stack trace or
  * debugging. An offset of zero indicates that there are no entries.
  */
 typedef struct DexCode {
     u2  registersSize;
     u2  insSize;
     u2  outsSize;
     u2  triesSize;
     u4  debugInfoOff;       /* file offset to debug info stream */
     u4  insnsSize;          /* size of the insns array, in u2 units */
     u2  insns[1];
     /* followed by optional u2 padding */
     /* followed by try_item[triesSize] */
     /* followed by uleb128 handlersSize */
     /* followed by catch_handler_item[handlersSize] */
 } DexCode;

到目前为止,基本上我们把dex文件组成结构给大家展示了出来。
下面要进行的就是对dex文件实例结合这篇来进行分析
Android学习心得(16) --- Dex文件结构解析(2)

你可能感兴趣的:(Android)