iOS消息传递objc_msgSend底层详解（上）

前言

上一节我们了解了cache_t的结构，取出了缓存中的方法，并且探究了插入方法的每一个步骤。但目前我们对于缓存的机制了解的还不全面，已知insert是插入方法到缓存中，但从方法的调用到插入方法到缓存之间做了什么呢？具体流程是什么？今天我们继续来分析。

cache工作流程

在objc_cache.mm文件中，有一段注释描述cache的流程：

 * Cache readers (PC-checked by collecting_in_critical())
 * objc_msgSend*
 * cache_getImp      // 获取Imp
 *
 * Cache readers/writers (hold cacheUpdateLock during access; not PC-checked)
 * cache_t::copyCacheNolock    (caller must hold the lock)      // 锁操作
 * cache_t::eraseNolock        (caller must hold the lock)           // 锁操作
 * cache_t::collectNolock      (caller must hold the lock)           // 锁操作
 * cache_t::insert             (acquires lock)      // 插入方法
 * cache_t::destroy            (acquires lock)    // 销毁

可以看到，调用方法后首先调用了objc_msgSend，然后缓存中读取（查找）对应方法，接下来锁操作不是我们关注的重点，可以无视，然后就到了插入，可以想出当在当前缓存中找不到方法才会进行插入操作。

接下来我们再从方法堆栈角度来看cache的流程，在insert方法打上断点，然后看堆栈信息：

insert之前的调用信息

可以看到，insert之前调用回推到objc_msgSend_uncached，同样从objc_msgSend相关方法走过来的，因此想要了解cache的完整流程，必须对objc_msgSend有一个全面的了解，接下来进入objc_msgSend源码，看看这个方法到底做了什么。

`objc_msgSend`源码解析

看源码之前，首先我们借用clang来看一下msgSend具体的操作方式（clang的方法之前有介绍过）。创建两个实例方法，分别有参和无参：

方法

调用：

方法调用

打开.cpp文件，看调用实例方法对应的底层代码：

((void (*)(id, SEL))(void *)objc_msgSend)((id)person, sel_registerName("instanceMethod1"));

((void (*)(id, SEL, NSString * _Nonnull))(void *)objc_msgSend)((id)person, sel_registerName("instanceMethod2:"), 
(NSString *)&__NSConstantStringImpl__var_folders_rp_g5qct2rs6wj7n5ps6nyvgzw40000gn_T_main_6a82ec_mi_0);

可以看到，调用instanceMethod1时，向objc_msgSend传入了person（消息接受者）和sel_registerName（方法名）；调用instanceMethod2时，向objc_msgSend传入了person（消息接受者），sel_registerName（方法名）和NSString类型的参数。传入多参的方法底层，大家可以自己试一下。
综上，总结出objc_msgSend机制为：objc_msgSend(消息接受者receiver, 方法名SEL, 参数params（可不传也可多个参数）)

了解objc_msgSend传入的参数之后就要找源码啦，在源码工程中全局搜索objc_msgSend，因为objc_msgSend是用汇编实现的，所以只需要关注.s文件。本篇就使用真机的arm64文件来分析。进入objc-msg-arm64.s文件，找到ENTRY _objc_msgSend，开始一句句的分析源码（虽然是汇编语言，语法都不熟悉，但好在源码注释非常友好，有些步骤对照注释来看会方便理解很多）：

ENTRY _objc_msgSend

objc_msgSend开始

cmp p0, #0          // nil check and tagged pointer check

这里p0是传入的第一个参数 - 消息接受者，cmp比较，用p0和0做比较

#if SUPPORT_TAGGED_POINTERS
    b.le    LNilOrTagged        //  (MSB tagged pointer looks negative)
#else
    b.eq    LReturnZero
#endif

le小于等于，eq等于，这里是判断接受者小于等于或等于0，针对指针类型做不同的处理，接收者为空则直接返回空LReturnZero。参数正常就不会走进这里

ldr p13, [x0]       // p13 = isa

[x0]意味消息接收者内存首地址，也就是isa，将isa存入寄存器p13

GetClassFromIsa_p16 p13, 1, x0  // p16 = class

这里需要先找出定义：

  /* note: auth_address is not required if !needs_auth */
.macro GetClassFromIsa_p16 src, needs_auth, auth_address 

#if SUPPORT_INDEXED_ISA
    // Indexed isa
    mov p16, \src           // optimistically set dst = src
    tbz p16, #ISA_INDEX_IS_NPI_BIT, 1f  // done if not non-pointer isa
    // isa in p16 is indexed
    adrp    x10, _objc_indexed_classes@PAGE
    add x10, x10, _objc_indexed_classes@PAGEOFF
    ubfx    p16, p16, #ISA_INDEX_SHIFT, #ISA_INDEX_BITS  // extract index
    ldr p16, [x10, p16, UXTP #PTRSHIFT] // load class from array
1:

#elif __LP64__
.if \needs_auth == 0 // _cache_getImp takes an authed class already
    mov p16, \src
.else
    // 64-bit packed isa
    ExtractISA p16, \src, \auth_address
.endif
#else

GetClassFromIsa_p16是一个宏，src是传入的isa，needs_auth为1，auth_address是person的首地址，下面SUPPORT_INDEXED_ISA在iOS设备上为0，所以走LP64，传入的第二个参数needs_auth为1，因此走else里面的逻辑

// 64-bit packed isa
ExtractISA p16, \src, \auth_address

再看ExtractISA：

.macro ExtractISA and   $0, $1, #ISA_MASK

and是与操作，ExtractISA就是将$1与上ISA_MASK并赋值给$0，带入参数值：p16 = isa & isa_Mask，也就是p16 = FCPerson.class拿到了消息接收者所属的类。

LGetIsaDone:
    // calls imp or objc_msgSend_uncached
    CacheLookup NORMAL, _objc_msgSend, __objc_msgSend_uncached

得到isa和class之后，根据注释理解CacheLookup是检查是否存在缓存，存在则执行imp，不存在就执行objc_msgSend_uncached，找定义：

.macro CacheLookup Mode, Function, MissLabelDynamic, MissLabelConstant

mov x15, x16            // stash the original isa
LLookupStart\Function:
    // p1 = SEL, p16 = isa
#if CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16_BIG_ADDRS
    ldr p10, [x16, #CACHE]              // p10 = mask|buckets
    lsr p11, p10, #48           // p11 = mask
    and p10, p10, #0xffffffffffff   // p10 = buckets
    and w12, w1, w11            // x12 = _cmd & mask
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16
    ldr p11, [x16, #CACHE]          // p11 = mask|buckets
      #if CONFIG_USE_PREOPT_CACHES
              #if __has_feature(ptrauth_calls)
                tbnz    p11, #0, LLookupPreopt\Function
                and p10, p11, #0x0000ffffffffffff   // p10 = buckets
              #else
                and p10, p11, #0x0000fffffffffffe   // p10 = buckets
                tbnz    p11, #0, LLookupPreopt\Function
              #endif
                eor p12, p1, p1, LSR #7
                and p12, p12, p11, LSR #48      // x12 = (_cmd ^ (_cmd >> 7)) & mask
      #else
        and p10, p11, #0x0000ffffffffffff   // p10 = buckets
        and p12, p1, p11, LSR #48       // x12 = _cmd & mask
      #endif // CONFIG_USE_PREOPT_CACHES
#else
#error Unsupported cache mask storage for ARM64.
#endif

    add p13, p10, p12, LSL #(1+PTRSHIFT)
                        // p13 = buckets + ((_cmd & mask) << (1+PTRSHIFT))

CacheLookup内部看起来做了一系列复杂操作，其实目的就是获取当前cache中的buckets用来检查当前调用的方法是否有缓存：

mov x15, x16：将isa赋值给x15；
iOS真机情况下，CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16，因此走：
ldr p11, [x16, #CACHE]： #CACHE = 16位，isa地址右移16位，得到p11 = cache_t；
接下来是一段条件判断，首先真机情况下CONFIG_USE_PREOPT_CACHES = 1，接下来ptrauth_calls条件，如果使用Apple A12或更高版本A系列处理器的设备（如iPhone XS、iPhone XS Max和iPhone XR或更新的设备）则为1，所以看哪个逻辑根据情况去判断，我们用下面的来分析：
and p10, p11, #0x0000fffffffffffe // p10 = buckets：cache_t与上0x0000fffffffffffe是什么意思？首先打开苹果自带计算器，选择编程模式可以看到结果：

0x0000fffffffffffe

再回到cache_t的定义中可以看到：

// How much the mask is shifted by.
    static constexpr uintptr_t maskShift = 48;

因此这里就是用cache_t与上48位，得到p10 = buckets

tbnz p11, #0, LLookupPreopt\Function：判断p11（cache_t）的0号位置是否为空，不为空则跳转到LLookupPreopt，做一些共享缓存的操作，这点我们以后会详细介绍。我们现在暂时不考虑为0的情况，所以继续向下走：

eor p12, p1, p1, LSR #7

eor是异或与，p1是SEL，将SEL右移7位的原因需要我们回到cache中的insert方法中，找到插入时的哈希算法：

static inline mask_t cache_hash(SEL sel, mask_t mask) 
{
    uintptr_t value = (uintptr_t)sel;
#if CONFIG_USE_PREOPT_CACHES
    value ^= value >> 7;
#endif
    return (mask_t)(value & mask);
}

可以看到，方法在插入缓存的时候，是sel右移7位后经过异或操作，再与上mask，最终算出了储存方法的bucket的下标index。所以取的时候，要使用同样的算法获取下标。同理后面右移48位并作了与上mask的操作：

and p12, p12, p11, LSR #48      // x12 = (_cmd ^ (_cmd >> 7)) & mask

// How much the mask is shifted by.
// static constexpr uintptr_t maskShift = 48;

最终得到的就是p12 = index，也就是得到了方法储存位置的下标。

再接下来进入一个do while循环：

    add p13, p10, p12, LSL #(1+PTRSHIFT)
                        // p13 = buckets + ((_cmd & mask) << (1+PTRSHIFT))

                        // do {
1:  ldp p17, p9, [x13], #-BUCKET_SIZE   //     {imp, sel} = *bucket--
    cmp p9, p1              //     if (sel != _cmd) {
    b.ne    3f              //         scan more
                        //     } else {
2:  CacheHit \Mode              // hit:    call or return imp
                        //     }
3:  cbz p9, \MissLabelDynamic       //     if (sel == 0) goto Miss;
    cmp p13, p10            // } while (bucket >= buckets)
    b.hs    1b

这一段其实完全可以通过注释和储存方法insert中的循环来理解，当前拿到了buckets哈希列表，通过do while循环来遍历其中的bucket，如果找到了则执行CacheHit，如果找不到则执行MissLabelDynamic。来看CacheHit：

// CacheHit: x17 = cached IMP, x10 = address of buckets, x1 = SEL, x16 = isa
.macro CacheHit
.if $0 == NORMAL
    TailCallCachedImp x17, x10, x1, x16 // authenticate and call imp

如果找到了缓存，则会根据x17 = cached IMP, x10 = address of buckets, x1 = SEL, x16 = isa调用TailCallCachedImp：

.macro TailCallCachedImp
    // $0 = cached imp, $1 = address of cached imp, $2 = SEL, $3 = isa
    // x17 = cached IMP, x10 = address of buckets, x1 = SEL, x16 = isa
    eor $0, $0, $3
    br  $0
.endmacro

这里将imp ^ isa就是imp的编码模式，imp在存储时经过了编码，在取出的时候同样要进行编码操作，然后br返回最终拿到的imp。

至此，objc_msgSend通过sel查找imp的全部流程已经走完了，汇编源码看起来晦涩难懂，但其实整体取imp的过程，就是在cache中存储方法的逆向操作，对照着注释，cache_t的数据结构和存储方法时的操作流程，还是容易理解的！

iOS消息传递objc_msgSend底层详解（上）

前言

cache工作流程

objc_msgSend源码解析

你可能感兴趣的:(iOS消息传递objc_msgSend底层详解（上）)

`objc_msgSend`源码解析