OC底层原理10 - objc_msgSend 流程之方法快速查找

消息查找

方法的本质

方法的本质就是objc_msgSend消息发送

//在main.mm中调用HQPerson类的实例方法
HQPerson* person = [[HQPerson alloc] init];
[person instanceMethod];
[person instanceMethod1];

//通过clang转c++后
HQPerson* person = ((HQPerson *(*)(id, SEL))(void *)objc_msgSend)((id)((HQPerson *(*)(id, SEL))(void *)objc_msgSend)((id)objc_getClass("HQPerson"), sel_registerName("alloc")), sel_registerName("init"));
((void (*)(id, SEL))(void *)objc_msgSend)((id)person, sel_registerName("instanceMethod"));
((void (*)(id, SEL))(void *)objc_msgSend)((id)person, sel_registerName("instanceMethod1"));

从上述代码中可以看到,执行对象的实例方法,其实就是向对象通过objc_msgSend发送消息。
既然objc_msgSend接口是向对象发送消息,那我们直接在代码中验证一下。
注意:在代码中直接使用objc_msgSend时,需要设置Build Setting中的enable strict checking of obc_msgSend calls,将其修改成NO,否则会报错。

验证

int main(int argc, const char * argv[]) {
    @autoreleasepool {
        // insert code here...
        NSLog(@"Hello, World!");
        HQPerson* person = [[HQPerson alloc] init];
        //通过对象直接调用对象方法
        [person instanceMethod];
        //通过objc_msgSend向对象person发送instanceMethod消息
        ((void (*)(id, SEL))(void *)objc_msgSend)((id)person, sel_registerName("instanceMethod"));
    }
    return 0;
}

//结果
2021-01-27 10:23:03.435353+0800 HQObjc[85983:8737055] Hello, World!
//[person instanceMethod]的打印
2021-01-27 10:23:03.436394+0800 HQObjc[85983:8737055] -[HQPerson instanceMethod]
//objc_msgSend发送消息的打印
2021-01-27 10:23:08.965764+0800 HQObjc[85983:8737055] -[HQPerson instanceMethod]

从结果中可以看到,使用对象直接调用对象方法与objc_msgSend发送消息得到的结果是一致的。

objc_msgSend原理分析

在objc源码中搜索objc_msgSend,发现objc_msgSend是汇编实现的,接下来以arm64的汇编为例进行objc_msgSend流程梳理。

  • 进入_objc_msgSend流程
//进入_objc_msgSend流程
ENTRY _objc_msgSend
//表示当前是无窗口的
UNWIND _objc_msgSend, NoFrame

//比较p0和0,检查当前p0是否为空或者是否为小对象
//此时的p0是objc_msgSend的第一个参数,表示消息接收者即receiver
cmp p0, #0          // nil check and tagged pointer check
//小对象宏定义
#if SUPPORT_TAGGED_POINTERS
//定义了小对象宏,如果p0小于等于0,则跳转至LNilOrTagged流程
b.le    LNilOrTagged        //  (MSB tagged pointer looks negative)
#else
//未定义小对象,如果p0等于0,则跳转至LReturnZero
b.eq    LReturnZero
#endif
//此时,p0大于0,表示消息接收者即receiver存在
//从x0寄存器加载数据到p13,此时x0寄存器中的内容是isa。因此p13表示isa
ldr p13, [x0]       // p13 = isa
//从p13的isa中获取类,并存储在p16中。
GetClassFromIsa_p16 p13, 1, x0  // p16 = class

LGetIsaDone:
    // calls imp or objc_msgSend_uncached
    //执行 CacheLookup 缓存查找流程
    CacheLookup NORMAL, _objc_msgSend, __objc_msgSend_uncached

#if SUPPORT_TAGGED_POINTERS
LNilOrTagged:
   //等于0的话,跳转LReturnZero流程
    b.eq    LReturnZero     // nil check
    //不等于0时,执行GetTaggedClass
    GetTaggedClass
    //跳转至LGetIsaDone
    b   LGetIsaDone
// SUPPORT_TAGGED_POINTERS
#endif

LReturnZero:
    // x0 is already zero
    mov x1, #0
    movi    d0, #0
    movi    d1, #0
    movi    d2, #0
    movi    d3, #0
    ret

//结束_objc_msgSend流程
END_ENTRY _objc_msgSend
  • GetClassFromIsa_p16:从isa中获取类地址
//GetClassFromIsa_p16 p13, 1, x0    // p16 = class
.macro GetClassFromIsa_p16 src, needs_auth, auth_address /* note: auth_address is not required if !needs_auth */

#if SUPPORT_INDEXED_ISA
    // Indexed isa
    //将p13(isa)的信息加载到p16
    mov p16, \src           // optimistically set dst = src
    //#   define ISA_INDEX_IS_NPI_BIT  0
    //判断isa是否是non-pointer,判断p16(isa)的第一个bit是否为1
    tbz p16, #ISA_INDEX_IS_NPI_BIT, 1f  // done if not non-pointer isa
    // isa in p16 is indexed
    //将_objc_indexed_classes所在的页的基址 读入x10寄存器
    adrp    x10, _objc_indexed_classes@PAGE
    //x10 = x10 + _objc_indexed_classes(page中的偏移量) 
    add x10, x10, _objc_indexed_classes@PAGEOFF
    ///从p16的第ISA_INDEX_SHIFT位开始,提取 ISA_INDEX_BITS 位 到 p16寄存器,剩余的高位用0补充
    ubfx    p16, p16, #ISA_INDEX_SHIFT, #ISA_INDEX_BITS  // extract index
    ldr p16, [x10, p16, UXTP #PTRSHIFT] // load class from array
1:

#elif __LP64__
.if \needs_auth == 0 // _cache_getImp takes an authed class already
    mov p16, \src
.else
    // 64-bit packed isa
    //将\src & #ISA_MASK 得到shiftcls的值,并存入p16中
    ExtractISA p16, \src, \auth_address
.endif
#else
    // 32-bit raw isa
    mov p16, \src

#endif

.endmacro

快速查找流程

  • CacheLookup 缓存查找流程(快速查找)
//CacheLookup NORMAL, _objc_msgSend, __objc_msgSend_uncached
.macro CacheLookup Mode, Function, MissLabelDynamic, MissLabelConstant
    //
    // Restart protocol:
    //
    //   As soon as we're past the LLookupStart\Function label we may have
    //   loaded an invalid cache pointer or mask.
    //
    //   When task_restartable_ranges_synchronize() is called,
    //   (or when a signal hits us) before we're past LLookupEnd\Function,
    //   then our PC will be reset to LLookupRecover\Function which forcefully
    //   jumps to the cache-miss codepath which have the following
    //   requirements:
    //
    //   GETIMP:
    //     The cache-miss is just returning NULL (setting x0 to 0)
    //
    //   NORMAL and LOOKUP:
    //   - x0 contains the receiver
    //   - x1 contains the selector
    //   - x16 contains the isa
    //   - other registers are set as per calling conventions
    //

    mov x15, x16            // stash the original isa
LLookupStart\Function:
    // p1 = SEL, p16 = isa
//64位真机
#if CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16_BIG_ADDRS
//#define CACHE (2 * __SIZEOF_POINTER__(pointer的大小 2*8 = 16))
    ldr p10, [x16, #CACHE]   // p10 = mask|buckets
    lsr p11, p10, #48           // p11 = mask
    and p10, p10, #0xffffffffffff   // p10 = buckets
    and w12, w1, w11            // x12 = _cmd & mask
//非64位真机
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16
    ldr p11, [x16, #CACHE]          // p11 = mask|buckets
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_LOW_4
    ldr p11, [x16, #CACHE]     // p11 = mask|buckets
    and p10, p11, #~0xf         // p10 = buckets
    and p11, p11, #0xf          // p11 = maskShift
    mov p12, #0xffff
    lsr p11, p12, p11           // p11 = mask = 0xffff >> p11
    and p12, p1, p11            // x12 = _cmd & mask
#else
#error Unsupported cache mask storage for ARM64.
#endif

//p10:buckets数组的首地址。
//p12:_cmd & mask,得到当前_cmd在buckets数组中的索引值,在cache_t的探索中,计算cache_t中的hash索引就是将cmd & mask。
//p13:从buckets数组的首地址,根据hash索引以及每个bucket的大小(索引值*16),取出hash索引值对应的bucket并将其存入p13中。
    add p13, p10, p12, LSL #(1+PTRSHIFT) // p13 = buckets + ((_cmd & mask) << (1+PTRSHIFT))
// do {
1:  ldp p17, p9, [x13], #-BUCKET_SIZE //     {imp, sel} = *bucket--
    cmp p9, p1              //     if (sel != _cmd) {
    b.ne    3f              //         scan more
//     } else {
2:  CacheHit \Mode              // hit:    call or return imp
//     }
3:  cbz p9, \MissLabelDynamic       //     if (sel == 0) goto Miss;
    cmp p13, p10            // } while (bucket >= buckets)
    b.hs    1b

    // wrap-around:
    //   p10 = first bucket
    //   p11 = mask (and maybe other bits on LP64)
    //   p12 = _cmd & mask
    //
    // A full cache can happen with CACHE_ALLOW_FULL_UTILIZATION.
    // So stop when we circle back to the first probed bucket
    // rather than when hitting the first bucket again.
    //
    // Note that we might probe the initial bucket twice
    // when the first probed slot is the last entry.


#if CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16_BIG_ADDRS
    add p13, p10, w11, UXTW #(1+PTRSHIFT) // p13 = buckets + (mask << 1+PTRSHIFT)
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_HIGH_16
    add p13, p10, p11, LSR #(48 - (1+PTRSHIFT)) // p13 = buckets + (mask << 1+PTRSHIFT)
// see comment about maskZeroBits
#elif CACHE_MASK_STORAGE == CACHE_MASK_STORAGE_LOW_4
    add p13, p10, p11, LSL #(1+PTRSHIFT) // p13 = buckets + (mask << 1+PTRSHIFT)
#else
#error Unsupported cache mask storage for ARM64.
#endif
    add p12, p10, p12, LSL #(1+PTRSHIFT) // p12 = first probed bucket

// do {
4:  ldp p17, p9, [x13], #-BUCKET_SIZE //     {imp, sel} = *bucket--
    cmp p9, p1              //     if (sel == _cmd)
    b.eq    2b              //         goto hit
    cmp p9, #0              // } while (sel != 0 &&
    ccmp    p13, p12, #0, ne        //     bucket > first_probed)
    b.hi    4b

LLookupEnd\Function:
LLookupRecover\Function:
    b   \MissLabelDynamic
.endmacro

总结:

  • 在缓存查找流程中,主要是通过类的isa中找到类信息
  • 从类信息中偏移至cache_t的地址,再通过掩码获取buckets数组的首地址。
  • 第一次遍历buckets数组
    • 通过传入的cmd计算当前的hash索引:index = _cmd & mask
    • 对buckets的首地址进行偏移:bucket = buckets[index] = buckets+index*16,此时得到buckets数组中,index对应的bucket的起始地址。
    • 取出bucket结构体中的SEL和IMP,将SEL与传入的_cmd进行比较。
    • 如果SEL与_cmd相同,跳转至CacheHit,表示缓存命中,返回imp。
    • 如果SEL与_cmd不相同,则开始循环查找。 将地址向前偏移:bucket = buckets[index-1],取出bucket的SEL和IMP继续进行比较。直到比较完buckets数组中的第一个元素仍然未打到后,跳出循环。
  • 第二次遍历buckets数组
    • 将指针定位到当前buckets数组中的最后一个元素,bucket = buckets[mask] = buckets+mask*16。
    • 取出bucket结构体中的SEL和IMP,将SEL与传入的_cmd进行比较。
    • 如果SEL与_cmd相同,跳转至CacheHit,表示缓存命中,返回imp。
    • 如果SEL与_cmd不相同,则开始循环查找。 将地址向前偏移:bucket = buckets[mask-1],取出bucket的SEL和IMP继续进行比较。直到比较完buckets数组中的第一个元素仍然未打到后,跳出循环。
  • 通过两次遍历后都未找到IMP时,则跳转__objc_msgSend_uncached流程,进入慢速查找流程。

慢速查找流程

  • 进入__objc_msgSend_uncached流程
//进入__objc_msgSend_uncached流程
STATIC_ENTRY __objc_msgSend_uncached
//表示当前是无窗口的
UNWIND __objc_msgSend_uncached, FrameWithNoSaves

// THIS IS NOT A CALLABLE C FUNCTION
// Out-of-band p15 is the class to search
//从方法列表中查找SEL
MethodTableLookup
//返回IMP
TailCallFunctionPointer x17

END_ENTRY __objc_msgSend_uncached
  • 跳转MethodTableLookup,通过方法列表查找
.macro MethodTableLookup
    
SAVE_REGS MSGSEND

// lookUpImpOrForward(obj, sel, cls, LOOKUP_INITIALIZE | LOOKUP_RESOLVER)
// receiver and selector already in x0 and x1
mov x2, x16
mov x3, #3
bl  _lookUpImpOrForward

// IMP in x0
mov x17, x0

RESTORE_REGS MSGSEND

.endmacro

总结:

  • 慢速查找主要是方法列表查找,重点就是:_lookUpImpOrForward函数。
  • _lookUpImpOrForward的参数为:x0-消息接收者;x1-消息SEL;x2-类地址;x3-3(表示LOOKUP_INITIALIZE | LOOKUP_RESOLVER)
  • lookUpImpOrForward 已经不再是汇编实现了,而是C/C++实现了。位于objc_runtime_new.mm中。

注:
1、C/C++中调用 汇编 ,去查找汇编时,C/C++调用的方法需要多加一个下划线
2、汇编 中调用 C/C++方法时,去查找C/C++方法,需要将汇编调用的方法去掉一个下划线

  • lookUpImpOrForward方法
//lookUpImpOrForward(obj, sel, cls, LOOKUP_INITIALIZE | LOOKUP_RESOLVER)
NEVER_INLINE
IMP lookUpImpOrForward(id inst, SEL sel, Class cls, int behavior)
{
    const IMP forward_imp = (IMP)_objc_msgForward_impcache;
    IMP imp = nil;
    Class curClass;

    runtimeLock.assertUnlocked();

    if (slowpath(!cls->isInitialized())) {
        // The first message sent to a class is often +new or +alloc, or +self
        // which goes through objc_opt_* or various optimized entry points.
        //
        // However, the class isn't realized/initialized yet at this point,
        // and the optimized entry points fall down through objc_msgSend,
        // which ends up here.
        //
        // We really want to avoid caching these, as it can cause IMP caches
        // to be made with a single entry forever.
        //
        // Note that this check is racy as several threads might try to
        // message a given class for the first time at the same time,
        // in which case we might cache anyway.
        behavior |= LOOKUP_NOCACHE;
    }

    // runtimeLock is held during isRealized and isInitialized checking
    // to prevent races against concurrent realization.

    // runtimeLock is held during method search to make
    // method-lookup + cache-fill atomic with respect to method addition.
    // Otherwise, a category could be added but ignored indefinitely because
    // the cache was re-filled with the old value after the cache flush on
    // behalf of the category.

    //加锁,目的是保证读取的线程安全
    runtimeLock.lock();

    // We don't want people to be able to craft a binary blob that looks like
    // a class but really isn't one and do a CFI attack.
    //
    // To make these harder we want to make sure this is a class that was
    // either built into the binary or legitimately registered through
    // objc_duplicateClass, objc_initializeClassPair or objc_allocateClassPair.
    //检查当前类是否是一个已知的类
    checkIsKnownClass(cls);

    //当前类是否已经初始化,若未初始化需要先初始化,确定类的继承链,以及rw、ro等信息
    cls = realizeAndInitializeIfNeeded_locked(inst, cls, behavior & LOOKUP_INITIALIZE);
    // runtimeLock may have been dropped but is now locked again
    runtimeLock.assertLocked();
    curClass = cls;

    // The code used to lookup the class's cache again right after
    // we take the lock but for the vast majority of the cases
    // evidence shows this is a miss most of the time, hence a time loss.
    //
    // The only codepath calling into this without having performed some
    // kind of cache lookup is class_getInstanceMethod().
    //unreasonableClassCount -- 表示类的迭代的上限
    for (unsigned attempts = unreasonableClassCount();;) {
        if (curClass->cache.isConstantOptimizedCache(/* strict */true)) {
        } else {
            // curClass method list.
            // 查找当前类的方法列表,采用二分查找的方法
            // 对比方法列表中存储的方法名地址与传入的sel的地址
            Method meth = getMethodNoSuper_nolock(curClass, sel);
            if (meth) {
                //找到sel方法时,返回方法列表中对应sel的imp
                imp = meth->imp(false);
                goto done;
            }
            
            //若当前类的方法列表中未找SEL,则去当前类的父类(父类不能空)中查找
            if (slowpath((curClass = curClass->getSuperclass()) == nil)) {
                // No implementation found, and method resolver didn't help.
                // Use forwarding.
                //const IMP forward_imp = (IMP)_objc_msgForward_impcache;
                imp = forward_imp;
                break;
            }
        }

        // Halt if there is a cycle in the superclass chain.
        // 此处表示类列表已经超过上限了
        if (slowpath(--attempts == 0)) {
            _objc_fatal("Memory corruption in class list.");
        }

        // Superclass cache.
        // 在父类 cache的方法列表中检索SEL,此时进行递归
        imp = cache_getImp(curClass, sel);
        if (slowpath(imp == forward_imp)) {
            // Found a forward:: entry in a superclass.
            // Stop searching, but don't cache yet; call method
            // resolver for this class first.
            break;
        }
        //在父类cache的方法列表中找到SEL,返回其IMP
        if (fastpath(imp)) {
            // Found the method in a superclass. Cache it in this class.
            goto done;
        }
    }

    // No implementation found. Try method resolver once.
    // 若父类中也未找到SEL,则进行一次方法解析
    if (slowpath(behavior & LOOKUP_RESOLVER)) {
        //此处表示,方法解析只会进行一次
        behavior ^= LOOKUP_RESOLVER;
        return resolveMethod_locked(inst, sel, cls, behavior);
    }

 done:
    // 此时已经找到SEL对应的IMP,判断该IMP是否是未被存储到cache中,若未存储,则将其存入cache中
    if (fastpath((behavior & LOOKUP_NOCACHE) == 0)) {
        log_and_fill_cache(cls, imp, sel, inst, curClass);
    }
 done_unlock:
    runtimeLock.unlock();
    if (slowpath((behavior & LOOKUP_NIL) && imp == forward_imp)) {
        return nil;
    }
    return imp;
}
  • 父类的缓存查找
STATIC_ENTRY _cache_getImp

GetClassFromIsa_p16 p0, 0
CacheLookup GETIMP, _cache_getImp, LGetImpMissDynamic, LGetImpMissConstant

LGetImpMissDynamic:
mov p0, #0
ret

LGetImpMissConstant:
mov p0, p2
ret

END_ENTRY _cache_getImp
  • 当父类都为nil了,仍然未找到SEL时,进入__objc_msgForward_impcache流程,由源码可见,__objc_msgForward_impcache流程中,其核心是__objc_forward_handler
STATIC_ENTRY __objc_msgForward_impcache

// No stret specialization.
b   __objc_msgForward

END_ENTRY __objc_msgForward_impcache

    
ENTRY __objc_msgForward

adrp    x17, __objc_forward_handler@PAGE
ldr p17, [x17, __objc_forward_handler@PAGEOFF]
TailCallFunctionPointer x17
    
END_ENTRY __objc_msgForward
  • __objc_forward_handler探索,此处就是未找到函数实现的最后的终点,报告未找到方法的错误。
void *_objc_forward_handler = (void*)objc_defaultForwardHandler;
__attribute__((noreturn, cold)) void
objc_defaultForwardHandler(id self, SEL sel)
{
    _objc_fatal("%c[%s %s]: unrecognized selector sent to instance %p "
                "(no message forward handler is installed)", 
                class_isMetaClass(object_getClass(self)) ? '+' : '-', 
                object_getClassName(self), sel_getName(sel), self);
}

总结

  • 快速查找流程

    • 【步骤1】当通过objc_msgSend发送消息时,进入汇编,使用汇编对当前类cache(缓存)开始查找。
    • 【步骤2】若在cache(缓存)中找到了SEL,则将其对应的IMP返回,并检查当前类的cache中是否有存储该方法,若未存储则需要将其存储至类的cache中。
  • 慢速查找流程

    • 【步骤3】若在cache(缓存)中未找到了SEL,则进入当前类的方法列表查找,
      • 方法列表查找的主要函数lookUpImpOrForward(obj, sel, cls, LOOKUP_INITIALIZE | LOOKUP_RESOLVER)流程。
      • lookUpImpOrForward(obj, sel, cls, LOOKUP_INITIALIZE | LOOKUP_RESOLVER)使用C/C++实现。
    • 【步骤4】在lookUpImpOrForward函数中,查找步骤
      • 【步骤4.1】查找当前类方法列表,当前类的方法列表保存在类的class_data_bits_t bits属性中。通过cls->data()读取class_rw_t
      • 【步骤4.2】若在当前类方法列表中查找到SEL,则将其对应的IMP返回,并检查当前类cache中是否有存储该方法,若未存储则需要将其存储至当前类的cache中。
      • 【步骤4.3】若在当前类方法列表中未查找到SEL,则调用父类cache_getImp方法,进行父类的缓存查找。注意:此时父类不能为空
      • 【步骤4.4】若在父类缓存列表中找到SEL,则将其对应的IMP返回,并检查当前类(此时是父类)cache中是否有存储该方法,若未存储则需要将其存储至类的cache中。注意:此处的当前是指父类。
      • 【步骤4.5】若在父类缓存列表中未找到SEL,则将当前类设置为父类,并重复【步骤4.1】查找当前类(已经修改成父类)方法列表。循环直至父类能为空后终止。
      • 【步骤4.6】此时,直到父类为空仍未找到SEL,则会进行一次动态方法解析
      • 【步骤4.7】若动态方法解析仍未找到SEL,则会进行消息转发,即打印没有实现函数,运行程序,崩溃时报的错误提示。
  • 实例方法的查找链:类->父类->根类->nil

  • 类方法的查找链:元类->根元类->根类->nil

你可能感兴趣的:(OC底层原理10 - objc_msgSend 流程之方法快速查找)