dex文件

Overview

source

  • art/runtime/dex_file.h
  • art/runtime/dex_file-inl.h
  • art/runtime/dex_file_types.h
  • art/runtime/dex_file.cc

数据结构

  • class DexFile
  • struct Header
    (gdb) p /d sizeof('art::DexFile::Header')
    $404 = 112
    
  • struct ClassDef
    (gdb) p /d sizeof('art::DexFile::ClassDef')
    $398 = 32
    
  • struct TypeId
    (gdb) p /d sizeof('art::DexFile::TypeId')
    $399 = 4
    
  • struct StringId
    (gdb) p /d sizeof('art::DexFile::StringId')
    $400 = 4
    
  • struct MethodId
    (gdb) p /d sizeof('art::DexFile::MethodId')
    $402 = 8
    
  • struct FieldId
    (gdb) p /d sizeof('art::DexFile::FieldId')
    $403 = 8
    
  • struct ProtoId
    (gdb) p /d sizeof('art::DexFile::ProtoId')
    $401 = 12
    
  • ClassDataItemIterator
  • struct ClassDataHeader
  • struct ClassDataField
  • struct ClassDataMethod
  • struct CodeItem

Dex文件布局

+------------+
|   Header   |
+------------+
| StringId[] |
+------------+
|  TypeId[]  |
+------------+
| ProtoId[]  |
+------------+
| FieldId[]  |
+------------+
| MethodId[] |
+------------+
| ClassDef[] |
+------------+
|    Data    |
+------------+

实现

这里会以core-oj.jar中的classes.dex为例进行解析.

Header

  • Header定义
    // Raw header_item.
    struct Header {
      uint8_t magic_[8];
      uint32_t checksum_;  // See also location_checksum_
      uint8_t signature_[kSha1DigestSize];
      uint32_t file_size_;  // size of entire file
      uint32_t header_size_;  // offset to start of next section
      uint32_t endian_tag_;
      uint32_t link_size_;  // unused
      uint32_t link_off_;  // unused
      uint32_t map_off_;  // unused
      uint32_t string_ids_size_;  // number of StringIds
      uint32_t string_ids_off_;  // file offset of StringIds array
      uint32_t type_ids_size_;  // number of TypeIds, we don't support more than 65535
      uint32_t type_ids_off_;  // file offset of TypeIds array
      uint32_t proto_ids_size_;  // number of ProtoIds, we don't support more than 65535
      uint32_t proto_ids_off_;  // file offset of ProtoIds array
      uint32_t field_ids_size_;  // number of FieldIds
      uint32_t field_ids_off_;  // file offset of FieldIds array
      uint32_t method_ids_size_;  // number of MethodIds
      uint32_t method_ids_off_;  // file offset of MethodIds array
      uint32_t class_defs_size_;  // number of ClassDefs
      uint32_t class_defs_off_;  // file offset of ClassDef array
      uint32_t data_size_;  // size of data section
      uint32_t data_off_;  // file offset of data section
    
      // Decode the dex magic version
      uint32_t GetVersion() const;
    
     private:
      DISALLOW_COPY_AND_ASSIGN(Header);
    };
    
  • 查看Header相关数据
    $ hexdump -C -s 0 -n 112 classes.dex 
    00000000  64 65 78 0a 30 33 37 00  8e 5e a1 39 cb 37 4f f8  |dex.037..^.9.7O.|
    00000010  a4 14 07 28 8c 0a db b6  cd 30 a8 fc 76 3e 11 65  |...(.....0..v>.e|
    00000020  fc 55 4d 00 70 00 00 00  78 56 34 12 00 00 00 00  |.UM.p...xV4.....|
    00000030  00 00 00 00 2c 55 4d 00  5d 86 00 00 70 00 00 00  |....,UM.]...p...|
    00000040  4d 0f 00 00 e4 19 02 00  e7 1c 00 00 18 57 02 00  |M............W..|
    00000050  d5 31 00 00 ec b1 03 00  b6 8a 00 00 94 40 05 00  |.1...........@..|
    00000060  cd 0d 00 00 44 96 09 00  18 06 42 00 e4 4f 0b 00  |....D.....B..O..|
    00000070
    
  • dex Header使用dexdump2解析如下
    DEX file header:
    magic               : 'dex\n037\0'
    checksum            : 39a15e8e
    signature           : cb37...1165
    file_size           : 5068284
    header_size         : 112
    link_size           : 0
    link_off            : 0 (0x000000)
    string_ids_size     : 34397
    string_ids_off      : 112 (0x000070)
    type_ids_size       : 3917
    type_ids_off        : 137700 (0x0219e4)
    proto_ids_size      : 7399 
    proto_ids_off       : 153368 (0x025718)
    field_ids_size      : 12757
    field_ids_off       : 242156 (0x03b1ec)
    method_ids_size     : 35510
    method_ids_off      : 344212 (0x054094)
    class_defs_size     : 3533 
    class_defs_off      : 628292 (0x099644)
    data_size           : 4326936
    data_off            : 741348 (0x0b4fe4)
    

Demo

下面会以BootClassLoader这个类举例说明上面重要的几个字段的含义.

... ...
Class #200            -
  Class descriptor  : 'Ljava/lang/BootClassLoader;'
  Access flags      : 0x0000 ()
  Superclass        : 'Ljava/lang/ClassLoader;'
... ...

即现在已知BootClassLoader类的ClassDef index是200

获取ClassDef

ClassDef的offet为: 0x099644 + 200 * 32 = 0x9af44

$ hexdump -C -s 0x9af44 -n 32 classes.dex 
0009af44  e9 00 00 00 00 00 00 00  fc 00 00 00 00 00 00 00  |................|
0009af54  d9 0d 00 00 a4 07 2f 00  d8 e0 49 00 00 00 00 00  |....../...I.....|
0009af64

根据struct ClassDef的定义

  • class_idx_ = 0xe9
  • access_flags_ = 0x00
  • superclass_idx_ = 0xfc
  • source_file_idx_ = 0x0dd9
  • class_data_off_ = 0x49e0d8

查看类名

  • class index

    class_idx_ 表示TypeId[]中的索引为0xe9

  • 查看type id

    0x0219e4 + 0xe9 * 4 = 0x21d88
    
    $ hexdump -C -s 0x21d88 -n 4 classes.dex 
    00021d88  18 1b 00 00                                       |....|
    00021d8c
    

    TypeId[]的元素值表示描述符在StringId[]中的索引

  • 查看string id

    0x000070 + 0x1b18 * 4 = 0x6cd0
    
    $ hexdump -C -s 0x6cd0 -n 4 classes.dex 
    00006cd0  57 ae 33 00                                       |W.3.|
    00006cd4 
    

    StringId[]的元素值表示字符串相对文件起始的偏移

  • 查看字符串

    $ hexdump -C -s 0x33ae57 -n 100 classes.dex 
    0033ae57  1b 4c 6a 61 76 61 2f 6c  61 6e 67 2f 42 6f 6f 74  |.Ljava/lang/Boot|
    0033ae67  43 6c 61 73 73 4c 6f 61  64 65 72 3b 00 20 4c 6a  |ClassLoader;. Lj|
    0033ae77  61 76 61 2f 6c 61 6e 67  2f 42 6f 6f 74 73 74 72  |ava/lang/Bootstr|
    0033ae87  61 70 4d 65 74 68 6f 64  45 72 72 6f 72 3b 00 1a  |apMethodError;..|
    0033ae97  4c 6a 61 76 61 2f 6c 61  6e 67 2f 42 79 74 65 24  |Ljava/lang/Byte$|
    0033aea7  42 79 74 65 43 61 63 68  65 3b 00 10 4c 6a 61 76  |ByteCache;..Ljav|
    0033aeb7  61 2f 6c 61                                       |a/la|
    0033aebb
    

    则Class index = 0xe9对应类名为"Ljava/lang/BootClassLoader;"

    同理,superclass_idx_ = 0xfc对应的类名为"Ljava/lang/ClassLoader;"

查看源文件

source_file_idx_ = 0x0dd9

0x70 + 0x0dd9 * 4 = 0x37d4

$ hexdump -C -s 0x37d4 -n 4 classes.dex
000037d4  38 a3 32 00                                       |8.2.|
000037d8

$ hexdump -C -s 0x32a338 -n 40 classes.dex
0032a338  10 43 6c 61 73 73 4c 6f  61 64 65 72 2e 6a 61 76  |.ClassLoader.jav|
0032a348  61 00 1b 43 6c 61 73 73  4e 6f 74 46 6f 75 6e 64  |a..ClassNotFound|
0032a358  45 78 63 65 70 74 69 6f                           |Exceptio|
0032a360

表示当前类位于ClassLoader.java

查看类的定义

class_data_item

  • class_data_item是用无符号LEB128进行编码
  • 解码相关可参考DecodeUnsignedLeb128()
$ hexdump -C -s 0x49e0d8 -n 100 classes.dex 
0049e0d8  01 00 02 07 a2 06 0a d7  0f 81 80 04 f4 91 39 06  |..............9.|
0049e0e8  89 80 08 90 92 39 d9 0f  04 d8 92 39 02 04 f8 92  |.....9.....9....|
0049e0f8  39 01 04 94 93 39 02 04  b8 93 39 01 01 b8 94 39  |9....9....9....9|
0049e108  01 01 d4 94 39 01 04 f0  94 39 01 00 04 00 a3 06  |....9....9......|
0049e118  1a e2 0f 81 80 04 98 95  39 01 81 80 04 b0 95 39  |........9......9|
0049e128  01 81 80 04 c8 95 39 01  81 80 04 e0 95 39 01 00  |......9......9..|
0049e138  02 00 a4 06                                       |....|

ClassDataHeader

  • 参考ClassDataItemIterator::ReadClassDataHeader()的实现
  • ClassDataHeader的几个成员值如下:
    static_fields_size_ = 0x01
    instance_fields_size_ = 0x00
    direct_methods_size_ = 0x02
    virtual_methods_size_ = 0x07
    

ClassDataField

  • ClassDataItemIterator::ReadClassDataField()
  • field_idx_delta_
    field_idx_delta_ = (0xa2 & 0x7f)|(0x06 << 7) = 0x322
      
    0x03b1ec + 0x322 * 8 = 0x3cafc
    
    $ hexdump -C -s 0x3cafc -n 8 classes.dex 
    0003cafc  e9 00 e9 00 39 60 00 00                           |....9`..|
    0003cb04
    
    • class_idx_ = 0xe9

      表示当前field属于BootClassLoader类

    • type_idx_ = 0xe9

      表示当前field是BootClassLoader类型

    • name_idx_ = 0x6039

      0x70 + 0x6039 * 4 = 0x18154
      $ hexdump -C -s 0x18154 -n 8 classes.dex 
      00018154  58 67 3a 00 62 67 3a 00                           |Xg:.bg:.|
      0001815c
      
      $ hexdump -C -s 0x3a6758 -n 40 classes.dex 
      003a6758  08 69 6e 73 74 61 6e 63  65 00 17 69 6e 73 74 61  |.instance..insta|
      003a6768  6e 63 65 46 6f 6c 6c 6f  77 52 65 64 69 72 65 63  |nceFollowRedirec|
      003a6778  74 73 00 0b 69 6e 73 74                           |ts..inst|
      003a6780
      
      当前field名字为"instance"
      
  • field_.access_flags_
    access_flags_ = 0x0a, 表示private static
    

ClassDataMethod

  • ClassDataItemIterator::ReadClassDataMethod()
  • method_idx_delta_
    method_.method_idx_delta_ =  (0xd7&0x7f)|(0x0f<<7) = 0x7d7
      
    这里的method_idx_delta_就是常见到的dex_method_idx
      
    0x054094 + 0x7d7 * 8 = 0x57f4c
    
    $ hexdump -C -s 0x57f4c -n 8 classes.dex 
    00057f4c  e9 00 57 10 e8 07 00 00                           |..W.....|
    00057f54
    
    • class_idx_ = 0xe9
      0xe9表示当前method属于BootClassLoader类
      
    • proto_idx_ = 0x1057
      0x025718 + 0x1057 * 12 = 0x31b2c
      $ hexdump -C -s 0x31b2c -n 12 classes.dex 
      00031b2c  18 3e 00 00 58 0e 00 00  00 00 00 00              |.>..X.......|
      00031b38
      
      • shorty_idx_ = 0x3e18
        0x70 + 0x3e18 * 4 = 0xf8d0
        
        $ hexdump -C -s 0xf8d0 -n 4 classes.dex 
        0000f8d0  fe 52 38 00                                       |.R8.|
        0000f8d4
        
        $ hexdump -C -s 0x3852fe -n 40 classes.dex 
        003852fe  01 56 00 02 56 31 00 02  56 32 00 02 56 33 00 02  |.V..V1..V2..V3..|
        0038530e  56 3a 00 03 56 3a 3a 00  03 56 41 49 00 04 56 41  |V:..V::..VAI..VA|
        0038531e  49 49 00 08 56 41 4c 49                           |II..VALI|
        00385326
        
        shorty descriptor是"V"
        
      • return_type_idx_ = 0x0e58
        0x0219e4 + 0x0e58 * 4 = 0x25344
        
        $ hexdump -C -s 0x25344 -n 4 classes.dex 
        00025344  18 3e 00 00                                       |.>..|
        00025348
        
        0x70 + 0x3e18 * 4 = 0xf8d0
        返回类型也是"V"
        
      • pad_ = 0x0000
      • parameters_off_ = 0x00
    • name_idx_ = 0x07e8
      0x000070 + 0x07e8 * 4 = 0x2010
      
      $ hexdump -C -s 0x2010 -n 8 classes.dex 
      00002010  d2 24 32 00 da 24 32 00                           |.$2..$2.|
      00002018
      
      $ hexdump -C -s 0x3224d2 -n 40 classes.dex 
      003224d2  06 3c 69 6e 69 74 3e 00  07 3c 69 6e 69 74 3e 20  |... |
      003224e2  00 19 3c 69 6e 69 74 69  61 6c 69 7a 65 64 20 66  |.."
      
  • access_flags_
    method_.access_flags_ = (0x04<<14)|((0x80&0x7f)<<7)|(0x81&0x7f) = 0x10001
    表示此method是pulibc constructor
    
  • code_off_
    • 查看CodeItem
      method_.code_off_ = (0xf4&0x7f)|((0x91&0x7f) <<7)|((0x39&0x7f)<<14) = 0xe48f4
      
      $ hexdump -C -s 0xe48f4 -n 40 classes.dex 
      000e48f4  02 00 01 00 02 00 00 00  ee 58 3d 00 05 00 00 00  |.........X=.....|
      000e4904  12 00 70 20 14 09 01 00  0e 00 00 00 02 00 00 00  |..p ............|
      000e4914  01 00 01 00 f5 58 3d 00                           |.....X=.|
      000e491c
      
      • registers_size_ = 0x02
      • ins_size_ = 0x01
      • outs_size_ = 0x02
      • tries_size_ = 0x00
      • debug_info_off_ = 0x3d58ee
        • debug_info信息解析参考DexFile::DecodeDebugPositionInfo()
          $ hexdump -C -s 0x3d58ee -n 7 classes.dex
          003d58ee  c6 0a 00 07 0e 4b 00                              |.....K.|
          003d58f5
          
          0x0000处dex指令对应的line为(0xc6&0x7f)|(0x0a<<7) = 1350
          0x0004处dex指令对应的line为 1350 + 1 = 1351
          
      • insns_size_in_code_units_ = 0x05
        • 每个code unit是2个byte
        • 每个dex指令可由多个code unit组成
        • oat dump
          200: Ljava/lang/BootClassLoader; (offset=0x0000ea9c) (type_idx=233) (StatusInitialized) (OatClassAllCompiled)
            0: void java.lang.BootClassLoader.() (dex_method_idx=2007)
              DEX CODE:
                0x0000: 1200                      | const/4 v0, #+0
                0x0001: 7020 1409 0100            | invoke-direct {v1, v0}, void java.lang.ClassLoader.(java.lang.ClassLoader) // method@2324
                0x0004: 0e00                      | return-void
              OatMethodOffsets (offset=0x0000eaa0)
          

DexFile

  • DexFile对象

    这里仍以core-oj.jar为例

    $3 = (art::DexFile) {
    _vptr$DexFile = 0x74da3dd1f0 , 
    static kDefaultMethodsVersion = 0x25, 
    static kClassDefinitionOrderEnforcedVersion = 0x25, 
    static kDexMagic = 0x74da36f654 , 
    static kNumDexVersions = 0x3, 
    static kDexVersionLen = 0x4, 
    static kDexMagicVersions = {{0x30, 0x33, 0x35, 0x0}, {0x30, 0x33, 0x37, 0x0}, {0x30, 0x33, 0x38, 0x0}}, 
    static kSha1DigestSize = 0x14, 
    static kDexEndianConstant = 0x12345678, 
    static kClassesDex = 0x74da33b0e2, 
    static kDexNoIndex = 0xffffffff, 
    static kDexNoIndex16 = 0xffff, 
    static kMultiDexSeparator = 0x3a, 
    begin_ = 0x74d991a01c, 
    size_ = 0x4d55fc, 
    location_ = {
      > = {}, 
      members of std::__1::basic_string, std::__1::allocator >: 
      __r_ = {
        , std::__1::allocator >::__rep, std::__1::allocator, 2>> = {
          > = {}, 
          members of std::__1::__libcpp_compressed_pair_imp, std::__1::allocator >::__rep, std::__1::allocator, 2>: 
          __first_ = {
            {
              __l = {
                __cap_ = 0x21, 
                __size_ = 0x1d, 
                __data_ = 0x74da41c340
              }, 
              __s = {
                {
                  __size_ = 0x21, 
                  __lx = 0x21
                }, 
                __data_ = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1d, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x40, 0xc3, 0x41, 0xda, 0x74, 0x0, 0x0, 0x0}
              }, 
              __r = {
                __words = {0x21, 0x1d, 0x74da41c340}
              }
            }
          }
        }, }, 
      static npos = 0xffffffffffffffff
    }, 
    location_checksum_ = 0xce236663, 
    mem_map_ = {
      __ptr_ = {
        , 2>> = {
          > = {}, 
          members of std::__1::__libcpp_compressed_pair_imp, 2>: 
          __first_ = 0x0
        }, }
    }, 
    header_ = 0x74d991a01c, 
    string_ids_ = 0x74d991a08c, 
    type_ids_ = 0x74d993ba00, 
    field_ids_ = 0x74d9955208, 
    method_ids_ = 0x74d996e0b0, 
    proto_ids_ = 0x74d993f734, 
    class_defs_ = 0x74d99b3660, 
    method_handles_ = 0x0, 
    num_method_handles_ = 0x0, 
    call_site_ids_ = 0x0, 
    num_call_site_ids_ = 0x0, 
    oat_dex_file_ = 0x74da438060
    }
    
  • 解析

    • vdex文件的的maps
      74d991a000-74d9e28000 r--s 00000000 103:0b 1194                          /system/framework/arm64/boot.vdex
      74b70ea000-74b75f8000 r--p 00000000 103:0b 1194                          /system/framework/arm64/boot.vdex
      
    • vdex mmap起始地址是0x74d991a000
    • header_ = 0x74d991a000 + 28 = 0x74d991a01c
    • string_ids_ = 0x74d991a01c + 0x70 = 0x74d991a08c
    • type_ids_ = 0x74d991a01c + 0x0219e4 = 0x74d993ba00
    • 其它依次类推

你可能感兴趣的:(dex文件)