Runtime源码版本:objc4-723, ARM64平台
weak/__weak是OC开发中常用的关键字,其有当指向的对象被销毁时置nil并且不会改变引用计数特性。
示例:
__weak NSObject *instanceObjc = [NSObject new];
iPhone7Plus运行的汇编如下
0x104f7287c <+0>: sub sp, sp, #0x40 ; =0x40
0x104f72880 <+4>: stp x29, x30, [sp, #0x30]
0x104f72884 <+8>: add x29, sp, #0x30 ; =0x30
0x104f72888 <+12>: adrp x8, 2
0x104f7288c <+16>: add x8, x8, #0xc58 ; =0xc58
0x104f72890 <+20>: adrp x9, 2
0x104f72894 <+24>: add x9, x9, #0xc60 ; =0xc60
0x104f72898 <+28>: stur w0, [x29, #-0x4]
0x104f7289c <+32>: stur x1, [x29, #-0x10]
0x104f728a0 <+36>: ldr x9, [x9]
0x104f728a4 <+40>: ldr x1, [x8]
0x104f728a8 <+44>: mov x0, x9
0x104f728ac <+48>: bl 0x104f72bcc ; symbol stub for: objc_msgSend
0x104f728b0 <+52>: add x8, sp, #0x18 ; =0x18
0x104f728b4 <+56>: mov x1, x0
0x104f728b8 <+60>: str x0, [sp, #0x10]
0x104f728bc <+64>: mov x0, x8
0x104f728c0 <+68>: bl 0x104f72bc0 ; symbol stub for: objc_initWeak
0x104f728c4 <+72>: ldr x1, [sp, #0x10]
0x104f728c8 <+76>: str x0, [sp, #0x8]
0x104f728cc <+80>: mov x0, x1
0x104f728d0 <+84>: bl 0x104f72be4 ; symbol stub for: objc_release
0x104f728d4 <+88>: add x8, sp, #0x18 ; =0x18
-> 0x104f728d8 <+92>: mov x0, x8
0x104f728dc <+96>: bl 0x104f72bb4 ; symbol stub for: objc_destroyWeak
0x104f728e0 <+100>: mov w10, #0x0
0x104f728e4 <+104>: mov x0, x10
0x104f728e8 <+108>: ldp x29, x30, [sp, #0x30]
0x104f728ec <+112>: add sp, sp, #0x40 ; =0x40
0x104f728f0 <+116>: ret
可根据汇编代码bl跳转指令可知声明__weak后其内部实现过程中调用了objc_initWeak,而对象消毁后调用了objc_destroyWeak。
objc_initWeak、objc_destroyWeak代码如下,最终会都调到storeWeak函数。objc_destroyWeak函数会向storeWeak传入一个nil,也就是当所指向的对象被销毁后主动置为nil。事实上通过查看NSObject.mm文件得知关于weak的所有实现最终都会调用到storeWeak函数。所以weak的实现实际就是storeWeak的实现。
id objc_initWeak(id *location, id newObj) // location为二维指针,在上面的例子中指向instanceObjc
{
if (!newObj) {
*location = nil;
return nil;
}
return storeWeak
(location, (objc_object*)newObj);
}
void objc_destroyWeak(id *location)
{
(void)storeWeak
(location, nil);
}
storeWeak函数定义如下,HaveOld、HaveNew分别代表弱引用指针(此处指示例中instanceObjc)有无旧值、是否有新的对象分配给弱引用指针(此处指示例中的[NSObject new]),这两个值编译阶段由Clang指定。
template
static id storeWeak(id *location, objc_object *newObj)
在看storeWeak函数逻辑之前需了解如下几个类型的定义
相关类型
template
class StripedMap {
enum { CacheLineSize = 64 };
enum { StripeCount = 64 };
struct PaddedT {
T value alignas(CacheLineSize);
};
PaddedT array[StripeCount];
static unsigned int indexForPointer(const void *p) {
uintptr_t addr = reinterpret_cast(p);
return ((addr >> 4) ^ (addr >> 9)) % StripeCount;
}
public:
T& operator[] (const void *p) {
return array[indexForPointer(p)].value;
}
// .... 无关代码省略
};
struct SideTable {
spinlock_t slock; // 线程锁
RefcountMap refcnts; // 引用计数(weak实现未用到)
weak_table_t weak_table; // 弱引用表(重点)
SideTable() {
memset(&weak_table, 0, sizeof(weak_table));
}
// .... 无关代码省略
};
struct weak_table_t {
weak_entry_t *weak_entries; //存储 弱引用指针及其所指向的对象(重点)
size_t num_entries;
uintptr_t mask; // 用于对weak_entries取值时下标的限定(不越界)
uintptr_t max_hash_displacement;
};
// 用于包装指针(主要防止反编绎工具直接看到对象地址)
template
class DisguisedPtr {
uintptr_t value;
static uintptr_t disguise(T* ptr) {
return -(uintptr_t)ptr; // 对指针地址取负数
}
static T* undisguise(uintptr_t val) {
return (T*)-val;
}
public:
DisguisedPtr() { }
DisguisedPtr(T* ptr)
: value(disguise(ptr)) { }
// .... 无关代码省略
};
// 类型别名
typedef DisguisedPtr weak_referrer_t;
struct weak_entry_t {
DisguisedPtr referent; // 弱引用指向的对象
union {
struct {
weak_referrer_t *referrers; // 弱引用指针数组
uintptr_t out_of_line_ness : 2;
uintptr_t num_refs : PTR_MINUS_2; // PTR_MINUS_2 == 62 on ARM64,弱引用个数(referrers数组个数)
uintptr_t mask; // 用于对referrers取值时下标的限定(不越界)
uintptr_t max_hash_displacement;
};
struct {
// out_of_line_ness field is low bits of inline_referrers[1]
weak_referrer_t inline_referrers[WEAK_INLINE_COUNT]; // WEAK_INLINE_COUNT == 4
};
};
bool out_of_line() {
return (out_of_line_ness == REFERRERS_OUT_OF_LINE); // REFERRERS_OUT_OF_LINE == 2
}
// .... 无关代码省略
weak_entry_t(objc_object *newReferent, objc_object **newReferrer)
: referent(newReferent)
{
inline_referrers[0] = newReferrer;
for (int i = 1; i < WEAK_INLINE_COUNT; i++) {
inline_referrers[i] = nil;
}
}
};
总体结构图如下:
代码逻辑
template
static id
storeWeak(id *location, objc_object *newObj)
{
assert(haveOld || haveNew);
if (!haveNew) assert(newObj == nil);
Class previouslyInitializedClass = nil;
id oldObj;
SideTable *oldTable;
SideTable *newTable;
// Acquire locks for old and new values.
// Order by lock address to prevent lock ordering problems.
// Retry if the old value changes underneath us.
retry:
if (haveOld) {
oldObj = *location;
// 从SideTables取出旧值对应SideTable
oldTable = &SideTables()[oldObj];
} else {
oldTable = nil;
}
if (haveNew) {
// 从SideTables取出新值对应SideTable
newTable = &SideTables()[newObj];
} else {
newTable = nil;
}
// 线程锁
SideTable::lockTwo(oldTable, newTable);
// 如有旧值(*location有旧值),之前*location已经赋值给oldObj了这里要是不一致说明有新的线程进来了,跳转到开始
if (haveOld && *location != oldObj) {
SideTable::unlockTwo(oldTable, newTable);
goto retry;
}
// 如新的对象的类对象没有被初始化或者已经被下面代码初始化但实其它线程传递了新的newObj,在这里进行初始化并从跳转到开始
if (haveNew && newObj) {
Class cls = newObj->getIsa();
if (cls != previouslyInitializedClass &&
!((objc_class *)cls)->isInitialized())
{
SideTable::unlockTwo(oldTable, newTable);
_class_initialize(_class_getNonMetaClass(cls, (id)newObj));
previouslyInitializedClass = cls;
goto retry;
}
}
// 解除旧的弱引用关系
if (haveOld) {
weak_unregister_no_lock(&oldTable->weak_table, oldObj, location);
}
if (haveNew) {
newObj = (objc_object *)
// 注册新的弱引用关系
weak_register_no_lock(&newTable->weak_table, (id)newObj, location,
crashIfDeallocating);
// 给对象打上弱引用标记,用于对象被销毁时清除弱引用
if (newObj && !newObj->isTaggedPointer()) {
newObj->setWeaklyReferenced_nolock();
}
// 这个时候才真正让弱引用指针指向newObj
*location = (id)newObj;
}
else {
// No new value. The storage is not changed.
}
// 线程锁
SideTable::unlockTwo(oldTable, newTable);
return (id)newObj;
}
以上代码比较好理解,简单来说就是:从SideTables取出新、旧值所对应的SideTable,在旧值对应的SideTable中解除旧的弱引用关系,在新值对应的SideTable中添加新的弱引用。这里不好理解的就是SideTables,其对应的代码如下。
alignas(TStripedMap) static uint8_t
SideTableBuf[sizeof(StripedMap)];
static StripedMap& SideTables() {
return *reinterpret_cast*>(SideTableBuf);
}
首先这里定义一个静态类型的数组SideTableBuf并进行了内存对齐,其大小是StripedMap
回到上面的storeWeak函数,再看解除弱引用关系的函数调用weak_unregister_no_lock与添加新的弱引用关系调用的函数weak_register_no_lock其第一个参数正是从SideTables取到的SideTable结构体实例的成员变量--weak_table。所以接下来,弱引用的实现转到了对weak_table_t的操作。先回顾一下weak_table_t的结构。
struct weak_table_t {
weak_entry_t *weak_entries; //数组,存储弱引用指针及其所指向的对象(重点)
size_t num_entries; // 当前weak_entries中有值的个数
uintptr_t mask; // 用于对weak_entries取值时下标的限定(不越界)
uintptr_t max_hash_displacement; // 记录从weak_entries找到空(nil)的entry时计算索引(index)最多的次数
};
struct weak_entry_t {
DisguisedPtr referent; // 弱引用指向的对象([NSObect new])
union { // 联合体,两个结构体大小(8*4字节)一致共用同一块内存,可以通过以下变量名访这32字节的内存
struct {
weak_referrer_t *referrers; // 弱引用指针数组
uintptr_t out_of_line_ness : 2; // 指定位域的结构体成员变量,加上下面的刚好64位,8字节
uintptr_t num_refs : PTR_MINUS_2; // PTR_MINUS_2 == 62 on ARM64,弱引用个数(referrers数组中有值的个数)
uintptr_t mask; // 用于对referrers取值时下标的限定(不越界)
uintptr_t max_hash_displacement; // 记录从referrers找到空(nil)的weak_referrer_t时计算索引(index)最多的次数
};
struct {
// out_of_line_ness field is low bits of inline_referrers[1]
weak_referrer_t inline_referrers[WEAK_INLINE_COUNT]; // WEAK_INLINE_COUNT == 4
};
};
};
weak_table_t结构体中主要是一个指向weak_entry_t数组的指针,其它的成员变量辅助数组的增删改查。每一个weak_entry_t结构体又包含两个关键的成员,指向对象的referent,指向弱引用指针数组的referrers。因为每个对象可以对应多个弱引用指针,所以需要用referrers数组结构来保存。接下来解除弱引用关系的函数weak_unregister_no_lock。
void
weak_unregister_no_lock(weak_table_t *weak_table, id referent_id,
id *referrer_id)
{
objc_object *referent = (objc_object *)referent_id;
objc_object **referrer = (objc_object **)referrer_id;
weak_entry_t *entry;
if (!referent) return;
// 根据referent参数也就是示例中的[NSObject new]对象来查找相应就entry
if ((entry = weak_entry_for_referent(weak_table, referent))) {
// 从entry中移除弱引用指针referrer也就是示例中的&instanceObjc
remove_referrer(entry, referrer);
bool empty = true;
if (entry->out_of_line() && entry->num_refs != 0) {
empty = false; // entry.referent所指向的对象超过4个弱引用
}
else {
entry.referent所指向的对象少于4个弱引用,但不为空
for (size_t i = 0; i < WEAK_INLINE_COUNT; i++) {
if (entry->inline_referrers[i]) {
empty = false;
break;
}
}
}
// 如果entry没有包含弱引用指针则从weak_table中移除
if (empty) {
weak_entry_remove(weak_table, entry);
}
}
}
解除弱引用关于的逻辑很直接,在weak_table中查找包含新对象的entry,然后从entry移除弱引用指针。移除后查看一下entry是否空了,是空的entry就从weak_table中移除。再来看weak_register_no_lock添加弱引用关于的实现。
id
weak_register_no_lock(weak_table_t *weak_table, id referent_id,
id *referrer_id, bool crashIfDeallocating)
{
objc_object *referent = (objc_object *)referent_id;
objc_object **referrer = (objc_object **)referrer_id;
// 提前判空或taggedPointer,taggedPointer已不算对象
if (!referent || referent->isTaggedPointer()) return referent_id;
// ensure that the referenced object is viable
bool deallocating;
if (!referent->ISA()->hasCustomRR()) {
// 如果没有实现了自定义的Retain/Release方法,查询当前是否正在dealloc
deallocating = referent->rootIsDeallocating();
}
else {
// 如果实现了自定义的Retain/Release方法,则通过SEL_allowsWeakReference询问是否允许弱引用
BOOL (*allowsWeakReference)(objc_object *, SEL) =
(BOOL(*)(objc_object *, SEL))
object_getMethodImplementation((id)referent,
SEL_allowsWeakReference);
if ((IMP)allowsWeakReference == _objc_msgForward) {
return nil;
}
deallocating =
! (*allowsWeakReference)(referent, SEL_allowsWeakReference);
}
// 当前正在dealloc则报错
if (deallocating) {
if (crashIfDeallocating) {
_objc_fatal("Cannot form weak reference to instance (%p) of "
"class %s. It is possible that this object was "
"over-released, or is in the process of deallocation.",
(void*)referent, object_getClassName((id)referent));
} else {
return nil;
}
}
// now remember it and where it is being stored
weak_entry_t *entry;
// 根据referent参数也就是示例中的[NSObject new]对象来查找相应就entry
if ((entry = weak_entry_for_referent(weak_table, referent))) {
// 添加弱引用到entry
append_referrer(entry, referrer);
}
else {
// 如果找不到说明当前weak_table中还没有此对象的弱引用,新建一个entry
weak_entry_t new_entry(referent, referrer);
// 查看当前是否需要增加weak_table的entry数,如需要则增加
weak_grow_maybe(weak_table);
// 插入entry到weak_table
weak_entry_insert(weak_table, &new_entry);
}
return referent_id;
}
如果忽略判断当前是否正在dealloc的代码,weak_register_no_lock的逻辑也相当的简单。查找对应entry,如当前有则直接在entry里新增弱引用指针。如果weak_table里没有则新建entry,查看当前是否需要增加weak_table的entry数,如需要则增加,最后插入新建军的entry。
通过新增和移除弱引用两个函数可以看出主体逻辑都非常简单粗爆,最终都是对entries与entry内referrers的操作。先从entry的查找开始,培析具体的实现逻辑。
static weak_entry_t *
weak_entry_for_referent(weak_table_t *weak_table, objc_object *referent)
{
assert(referent);
weak_entry_t *weak_entries = weak_table->weak_entries;
if (!weak_entries) return nil;
// 对象地址的哈希化,和mask按位与是为了保存begin的值不会越界(和取余的效果一样)
size_t begin = hash_pointer(referent) & weak_table->mask;
size_t index = begin;
size_t hash_displacement = 0;
while (weak_table->weak_entries[index].referent != referent) {
index = (index+1) & weak_table->mask;
// 遍历完都找不到对应的entry时报错
if (index == begin) bad_weak_table(weak_table->weak_entries);
hash_displacement++;
// 处理哈希碰撞,大于加入时最大碰撞次数还找不到则说明entries里真的没有对应entry,避免过多的遍历。
if (hash_displacement > weak_table->max_hash_displacement) {
return nil;
}
}
return &weak_table->weak_entries[index];
}
hash_pointer是对referent地址求哈希值,实际是几个位运算看不出啥规律。哈希碰撞用于解决hash_point对不同对象按位与上mask时算出同个值的问题,理解这个问题需要再看增加entry的函数
static void weak_entry_insert(weak_table_t *weak_table, weak_entry_t *new_entry)
{
weak_entry_t *weak_entries = weak_table->weak_entries;
assert(weak_entries != nil);
// 此处逻辑同上
size_t begin = hash_pointer(new_entry->referent) & (weak_table->mask);
size_t index = begin;
size_t hash_displacement = 0;
// 算出的index处不为空则说明发生了碰撞(不同对象算出了同样的值),往后查找空的entry并记录往后了多少次
while (weak_entries[index].referent != nil) {
index = (index+1) & weak_table->mask;
if (index == begin) bad_weak_table(weak_entries);
hash_displacement++;
}
// 找到了空位,赋值
weak_entries[index] = *new_entry;
// 有效entry计数加1
weak_table->num_entries++;
// 记录最大碰撞次数
if (hash_displacement > weak_table->max_hash_displacement) {
weak_table->max_hash_displacement = hash_displacement;
}
}
insert时会记录最大碰撞次数,方便查找时作为一个限定避免过多无意义的查找。其实这里查找与新增完全可以从头到尾去遍历数组,但这样做显然没有计算hash值效率高。并且weak_table还会对entries进行扩容,使得碰撞的次数控制在合理的范围内。看下weak_register_no_lock内新增entry时的扩容函数。
static void weak_resize(weak_table_t *weak_table, size_t new_size)
{
size_t old_size = TABLE_SIZE(weak_table);
weak_entry_t *old_entries = weak_table->weak_entries;
weak_entry_t *new_entries = (weak_entry_t *)
calloc(new_size, sizeof(weak_entry_t));
// 新的mask值,entry总数-1
weak_table->mask = new_size - 1;
weak_table->weak_entries = new_entries;
// 重新计算哈希碰撞
weak_table->max_hash_displacement = 0;
// 调用insert时重新计算
weak_table->num_entries = 0; // restored by weak_entry_insert below
// 把旧的entry复制到新的entry
if (old_entries) {
weak_entry_t *entry;
weak_entry_t *end = old_entries + old_size;
for (entry = old_entries; entry < end; entry++) {
if (entry->referent) {
weak_entry_insert(weak_table, entry);
}
}
free(old_entries);
}
}
// Grow the given zone's table of weak references if it is full.
static void weak_grow_maybe(weak_table_t *weak_table)
{
size_t old_size = TABLE_SIZE(weak_table);
// 有效的entry大于全部的3/4,扩容到原来的两倍
if (weak_table->num_entries >= old_size * 3 / 4) {
weak_resize(weak_table, old_size ? old_size*2 : 64);
}
}
从上面扩容的代码可以看出扩容的操作不仅是重新申请内存还需要把原来的复制到新申请到的内存上,也相当消耗性能。以上是增删改查中的「查」、「增」逻辑,「删」的逻辑比较简单这里就不解析了,需要注意的是删的操作会有与扩容相反的操作--减小entries数量。
接下来再来看下entry的「改」,也就是在entries中找到对应entry时调用的append_referrer函数。
static void append_referrer(weak_entry_t *entry, objc_object **new_referrer)
{
// 查看out_of_lines标志位,查看当前entry是否分配了额外的数组
if (! entry->out_of_line()) {
// 如果没有值偿试找空位放入弱引用指针
for (size_t i = 0; i < WEAK_INLINE_COUNT; i++) {
if (entry->inline_referrers[i] == nil) {
entry->inline_referrers[i] = new_referrer;
return;
}
}
// 没有找到空位则申请额外的内存
weak_referrer_t *new_referrers = (weak_referrer_t *)
calloc(WEAK_INLINE_COUNT, sizeof(weak_referrer_t));
// 直接放在entry内的弱引用指针加入新分配的额外数组内
for (size_t i = 0; i < WEAK_INLINE_COUNT; i++) {
new_referrers[i] = entry->inline_referrers[i];
}
// 分配了额外的数组后在entry内设置相关标志位
entry->referrers = new_referrers;
entry->num_refs = WEAK_INLINE_COUNT;
// 用于标记entry是否超过4个弱引用指针,REFERRERS_OUT_OF_LINE==2
entry->out_of_line_ness = REFERRERS_OUT_OF_LINE;
entry->mask = WEAK_INLINE_COUNT-1;
// 哈希碰撞计数
entry->max_hash_displacement = 0;
}
assert(entry->out_of_line());
// 扩容
if (entry->num_refs >= TABLE_SIZE(entry) * 3/4) {
return grow_refs_and_insert(entry, new_referrer);
}
size_t begin = w_hash_pointer(new_referrer) & (entry->mask);
size_t index = begin;
size_t hash_displacement = 0;
while (entry->referrers[index] != nil) {
hash_displacement++;
index = (index+1) & entry->mask;
if (index == begin) bad_weak_table(entry);
}
if (hash_displacement > entry->max_hash_displacement) {
entry->max_hash_displacement = hash_displacement;
}
weak_referrer_t &ref = entry->referrers[index];
ref = new_referrer;
entry->num_refs++;
}
append_referrer实际就是新增referrers的逻辑与新增entry的逻辑基本相似,不同的是referrers内有一个联合体,当弱引用指针数少于4个时会直接保存在联合体的4*8字节内。如若大于4个则会另外申请内存保存并把返回的内存首地址给到联合体的首个8字节也就是weak_referrer_t变量,联合体内另外的内存空间则增删改查的辅助数据。这里用联合体的第2个8字节的低两位(out_of_line_ness)作为标志来判断当前弱引用保存的位置。但是当指针数少于4个直接保持在联合体内会覆盖out_of_line_ness标志位,那还怎么标识呢?原因在于,newReferrer(弱引用指针)赋值给weak_referrer_t(实际是DisguisedPtr
当对象被销毁时会根据对象的弱引用标志位来判断是否需要调用weak_clear_no_lock从weak_table中移除对应entry。如果理解了上面的代码解析,相信这部分也会顺理成章。
总节
weak的实现实际就是对自定义哈希表的管理,只不过这个哈希表支持一对多的系。Runtime所有的弱引用都由StripedMap内的64个全局SideTable进行管理。用一个简单的哈希算法根据对象地址找一个SideTable。SideTable又包含一个专门用来管理弱引用的weak_table。每个weak_table又包含若干weak_entry_t。每个对象的弱引用都用一个weak_entry_t来管理,weak_entry_t内就是对象(referent)与弱引用指针(referentes)。weak_table和weak_entry合作完成一对多的印射关系。