005-类结构探索(一)

前言

前面我们分析了对象&类&元类的层次结构,以及他们之间的关系。
显然,类模版cls是我们整个OC面向对象设计里面的核心数据结构。它保存了对象创建所需的信息(大小,成员,成员访问路径等),也保存了方法/属性/协议/成员列表/缓存等共享内容。
让我们一期通过LLDB和源码一步步探索认识类结构的底层实现。

objc_class

objc4-818.2的源码

struct objc_class {
    Class _Nonnull isa  OBJC_ISA_AVAILABILITY;

#if !__OBJC2__
    Class _Nullable super_class                              OBJC2_UNAVAILABLE;
    const char * _Nonnull name                               OBJC2_UNAVAILABLE;
    long version                                             OBJC2_UNAVAILABLE;
    long info                                                OBJC2_UNAVAILABLE;
    long instance_size                                       OBJC2_UNAVAILABLE;
    struct objc_ivar_list * _Nullable ivars                  OBJC2_UNAVAILABLE;
    struct objc_method_list * _Nullable * _Nullable methodLists                    OBJC2_UNAVAILABLE;
    struct objc_cache * _Nonnull cache                       OBJC2_UNAVAILABLE;
    struct objc_protocol_list * _Nullable protocols          OBJC2_UNAVAILABLE;
#endif

} OBJC2_UNAVAILABLE;

上面是非objc2的,objc2之前的版本objc_class的定义,显然不是我们研究的重点。
在objc-runtime-new.h中,找到了objc2的定义如下:

struct objc_class : objc_object {
  objc_class(const objc_class&) = delete;
  objc_class(objc_class&&) = delete;
  void operator=(const objc_class&) = delete;
  void operator=(objc_class&&) = delete;
    // Class ISA;
    Class superclass;
    cache_t cache;             // formerly cache pointer and vtable
    class_data_bits_t bits;    // class_rw_t * plus custom rr/alloc flags
/**
此处省略大量方法定义以及针对不同平台或者cpu架构的处理,有兴趣的同学可以查看源码。
*/
}

新版的objc_clss定义是继承自objc_object,继承了isa指针成员。
当前结构体有四个成员:isa、superClass、cahe、bits。
isa指向元类
superClass指向父类
cache是缓存,跟运行时相关,我们后面单独一个篇章来探索这个缓存机制。
那么bits可能就是存储方法列表,属性列表等信息的地方。

class_data_bits_t

struct class_data_bits_t {
    //可以访问objc_class的私有成员
    friend objc_class;

    // Values are the FAST_ flags above.
    uintptr_t bits;
private:
    bool getBit(uintptr_t bit) const
    {
        return bits & bit;
    }

    // Atomically set the bits in `set` and clear the bits in `clear`.
    // set and clear must not overlap.
    void setAndClearBits(uintptr_t set, uintptr_t clear)
    {
        ASSERT((set & clear) == 0);
        uintptr_t newBits, oldBits = LoadExclusive(&bits);
        do {
            newBits = (oldBits | set) & ~clear;
        } while (slowpath(!StoreReleaseExclusive(&bits, &oldBits, newBits)));
    }

    void setBits(uintptr_t set) {
        __c11_atomic_fetch_or((_Atomic(uintptr_t) *)&bits, set, __ATOMIC_RELAXED);
    }

    void clearBits(uintptr_t clear) {
        __c11_atomic_fetch_and((_Atomic(uintptr_t) *)&bits, ~clear, __ATOMIC_RELAXED);
    }

public:
   //获取data,class_rw_t
    class_rw_t* data() const {
        return (class_rw_t *)(bits & FAST_DATA_MASK);
    }
    void setData(class_rw_t *newData)
    {
        ASSERT(!data()  ||  (newData->flags & (RW_REALIZING | RW_FUTURE)));
        // Set during realization or construction only. No locking needed.
        // Use a store-release fence because there may be concurrent
        // readers of data and data's contents.
        uintptr_t newBits = (bits & ~FAST_DATA_MASK) | (uintptr_t)newData;
        atomic_thread_fence(memory_order_release);
        bits = newBits;
    }

    // Get the class's ro data, even in the presence of concurrent realization.
    // fixme this isn't really safe without a compiler barrier at least
    // and probably a memory barrier when realizeClass changes the data field
    const class_ro_t *safe_ro() const {
        class_rw_t *maybe_rw = data();
        if (maybe_rw->flags & RW_REALIZED) {
            // maybe_rw is rw
            return maybe_rw->ro();
        } else {
            // maybe_rw is actually ro
            return (class_ro_t *)maybe_rw;
        }
    }

#if SUPPORT_INDEXED_ISA
    void setClassArrayIndex(unsigned Idx) {
        // 0 is unused as then we can rely on zero-initialisation from calloc.
        ASSERT(Idx > 0);
        data()->index = Idx;
    }
#else
    void setClassArrayIndex(__unused unsigned Idx) {
    }
#endif

    unsigned classArrayIndex() {
#if SUPPORT_INDEXED_ISA
        return data()->index;
#else
        return 0;
#endif
    }

    bool isAnySwift() {
        return isSwiftStable() || isSwiftLegacy();
    }

    bool isSwiftStable() {
        return getBit(FAST_IS_SWIFT_STABLE);
    }
    void setIsSwiftStable() {
        setAndClearBits(FAST_IS_SWIFT_STABLE, FAST_IS_SWIFT_LEGACY);
    }

    bool isSwiftLegacy() {
        return getBit(FAST_IS_SWIFT_LEGACY);
    }
    void setIsSwiftLegacy() {
        setAndClearBits(FAST_IS_SWIFT_LEGACY, FAST_IS_SWIFT_STABLE);
    }

    // fixme remove this once the Swift runtime uses the stable bits
    bool isSwiftStable_ButAllowLegacyForNow() {
        return isAnySwift();
    }

    _objc_swiftMetadataInitializer swiftMetadataInitializer() {
        // This function is called on un-realized classes without
        // holding any locks.
        // Beware of races with other realizers.
        return safe_ro()->swiftMetadataInitializer();
    }
};

class_data_bits_t结构体的成员非常简单,只有一个指针成员uintptr_t bits。
既然只有一个指针成员,为什么还要进行一次结构体包装?

经过class_data_bits_t结构体包装,扩展了一系列的数据的访问方法。
bits指针不仅存储了class_rw_t结构体的指针地址(可以通过data()函数获得,也是我们类数据的主要存储结构),通过扩展方法还可以对bits的一些位进行设置,来存储一些额外信息,比如:hasCustomRR、一些swift信息。这里看起来类似nonpointer_isa,对指针的空余位进行了利用,来存储更多的信息,体现了苹果对内存的优化。

class_data_bits_t更主要的作用是通过bits成员存储了class_rw_t *指针。

class_rw_t

struct class_rw_t {
    // Be warned that Symbolication knows the layout of this structure.
    uint32_t flags;
    uint16_t witness;
#if SUPPORT_INDEXED_ISA
    uint16_t index;
#endif

    explicit_atomic ro_or_rw_ext;

    Class firstSubclass;
    Class nextSiblingClass;

private:
    using ro_or_rw_ext_t = objc::PointerUnion;

    const ro_or_rw_ext_t get_ro_or_rwe() const {
        return ro_or_rw_ext_t{ro_or_rw_ext};
    }

    void set_ro_or_rwe(const class_ro_t *ro) {
        ro_or_rw_ext_t{ro, &ro_or_rw_ext}.storeAt(ro_or_rw_ext, memory_order_relaxed);
    }

    void set_ro_or_rwe(class_rw_ext_t *rwe, const class_ro_t *ro) {
        // the release barrier is so that the class_rw_ext_t::ro initialization
        // is visible to lockless readers
        rwe->ro = ro;
        ro_or_rw_ext_t{rwe, &ro_or_rw_ext}.storeAt(ro_or_rw_ext, memory_order_release);
    }

    class_rw_ext_t *extAlloc(const class_ro_t *ro, bool deep = false);

public:
    void setFlags(uint32_t set)
    {
        __c11_atomic_fetch_or((_Atomic(uint32_t) *)&flags, set, __ATOMIC_RELAXED);
    }
    void clearFlags(uint32_t clear) 
    {
        __c11_atomic_fetch_and((_Atomic(uint32_t) *)&flags, ~clear, __ATOMIC_RELAXED);
    }

    // set and clear must not overlap
    void changeFlags(uint32_t set, uint32_t clear) 
    {
        ASSERT((set & clear) == 0);

        uint32_t oldf, newf;
        do {
            oldf = flags;
            newf = (oldf | set) & ~clear;
        } while (!OSAtomicCompareAndSwap32Barrier(oldf, newf, (volatile int32_t *)&flags));
    }

    class_rw_ext_t *ext() const {
        return get_ro_or_rwe().dyn_cast(&ro_or_rw_ext);
    }

    class_rw_ext_t *extAllocIfNeeded() {
        auto v = get_ro_or_rwe();
        if (fastpath(v.is())) {
            return v.get(&ro_or_rw_ext);
        } else {
            return extAlloc(v.get(&ro_or_rw_ext));
        }
    }

    class_rw_ext_t *deepCopy(const class_ro_t *ro) {
        return extAlloc(ro, true);
    }

    const class_ro_t *ro() const {
        auto v = get_ro_or_rwe();
        if (slowpath(v.is())) {
            return v.get(&ro_or_rw_ext)->ro;
        }
        return v.get(&ro_or_rw_ext);
    }

    void set_ro(const class_ro_t *ro) {
        auto v = get_ro_or_rwe();
        if (v.is()) {
            v.get(&ro_or_rw_ext)->ro = ro;
        } else {
            set_ro_or_rwe(ro);
        }
    }

    const method_array_t methods() const {
        auto v = get_ro_or_rwe();
        if (v.is()) {
            return v.get(&ro_or_rw_ext)->methods;
        } else {
            return method_array_t{v.get(&ro_or_rw_ext)->baseMethods()};
        }
    }

    const property_array_t properties() const {
        auto v = get_ro_or_rwe();
        if (v.is()) {
            return v.get(&ro_or_rw_ext)->properties;
        } else {
            return property_array_t{v.get(&ro_or_rw_ext)->baseProperties};
        }
    }

    const protocol_array_t protocols() const {
        auto v = get_ro_or_rwe();
        if (v.is()) {
            return v.get(&ro_or_rw_ext)->protocols;
        } else {
            return protocol_array_t{v.get(&ro_or_rw_ext)->baseProtocols};
        }
    }
};

class_rw_t结构体有以下几个成员:

  • uint32_t flags;
    标记位,通过掩码运算,来存取一些bool信息,包括:
    allowsPreoptCaches、allowsPreoptInlinedSels、instancesHaveAssociatedObjects、isInitializing、isInitialized、isRealized、isMetaClass、isMetaClassMaybeUnrealizedde
  • uint16_t witness;
    如果一个类被实现,witness记录可以find的类的range
    objc::dataSegmentsRanges.find((uintptr_t)cls, index)
    witness = index
  • explicit_atomic ro_or_rw_ext;
    进行原子性包装的指针
  • Class firstSubclass;
    第一个子类
  • Class nextSiblingClass;
    兄弟类
    cls->data()->nextSiblingClass = _firstRealizedClass;
    subcls->data()->nextSiblingClass = supercls->data()->firstSubclass;

单独看这四个成员,我们仍然没有找到方法,属性,协议等信息的影子。但是我们发现class_rw_t有methods()、proprties()、protocols()三个方法。他们通过ro_or_rw_ext指针来获取方法、属性、协议等信息 。

ro_or_rw_ext

这个指针成员的实现也是非常的巧妙,它的存取都要经过一次pointerUnion的转换。ro_or_rw_ext可以是class_ro_t*,也可以是class_rw_ext_t,通过指针bit位的低一位进行区分。

private:
    using ro_or_rw_ext_t = objc::PointerUnion;

    const ro_or_rw_ext_t get_ro_or_rwe() const {
        return ro_or_rw_ext_t{ro_or_rw_ext};
    }

    void set_ro_or_rwe(const class_ro_t *ro) {
        ro_or_rw_ext_t{ro, &ro_or_rw_ext}.storeAt(ro_or_rw_ext, memory_order_relaxed);
    }

    void set_ro_or_rwe(class_rw_ext_t *rwe, const class_ro_t *ro) {
        // the release barrier is so that the class_rw_ext_t::ro initialization
        // is visible to lockless readers
        rwe->ro = ro;
        ro_or_rw_ext_t{rwe, &ro_or_rw_ext}.storeAt(ro_or_rw_ext, memory_order_release);
    }

    class_rw_ext_t *extAlloc(const class_ro_t *ro, bool deep = false);
****************************************************************
class PointerUnion {
    uintptr_t _value;

    static_assert(alignof(T1) >= 2, "alignment requirement");
    static_assert(alignof(T2) >= 2, "alignment requirement");

    struct IsPT1 {
      static const uintptr_t Num = 0;
    };
    struct IsPT2 {
      static const uintptr_t Num = 1;
    };
    template  struct UNION_DOESNT_CONTAIN_TYPE {};

    uintptr_t getPointer() const {
        return _value & ~1;
    }
    uintptr_t getTag() const {
        return _value & 1;
    }

public:
    explicit PointerUnion(const std::atomic &raw)
    : _value(raw.load(std::memory_order_relaxed))
    { }
    PointerUnion(T1 *t, const void *address) {
        _value = (uintptr_t)Auth1::sign(t, address);
    }
    PointerUnion(T2 *t, const void *address) {
        _value = (uintptr_t)Auth2::sign(t, address) | 1;
    }

    void storeAt(std::atomic &raw, std::memory_order order) const {
        raw.store(_value, order);
    }

    template 
    bool is() const {
        using Ty = typename PointerUnionTypeSelector>>::Return;
        return getTag() == Ty::Num;
    }

    template  T get(const void *address) const {
        ASSERT(is() && "Invalid accessor called");
        using AuthT = typename PointerUnionTypeSelector>>::Return;

        return AuthT::auth((T)getPointer(), address);
    }

    template  T dyn_cast(const void *address) const {
      if (is())
        return get(address);
      return T();
    }
};

class_rw_ext_t

struct class_rw_ext_t {
    DECLARE_AUTHED_PTR_TEMPLATE(class_ro_t)
    class_ro_t_authed_ptr ro;
    method_array_t methods;
    property_array_t properties;
    protocol_array_t protocols;
    char *demangledName;
    uint32_t version;
};

千呼万唤始出来,终于看到了baseMethodList、baseProtocols、 ivars、weakIvarLayout、baseProperties这些内容。那么class_ro_t又是什么呢?

class_ro_t

struct class_ro_t {
    uint32_t flags;
    uint32_t instanceStart;
    uint32_t instanceSize;
#ifdef __LP64__
    uint32_t reserved;
#endif

    union {
        const uint8_t * ivarLayout;
        Class nonMetaclass;
    };

    explicit_atomic name;
    // With ptrauth, this is signed if it points to a small list, but
    // may be unsigned if it points to a big list.
    void *baseMethodList;
    protocol_list_t * baseProtocols;
    const ivar_list_t * ivars;

    const uint8_t * weakIvarLayout;
    property_list_t *baseProperties;

    // This field exists only when RO_HAS_SWIFT_INITIALIZER is set.
    _objc_swiftMetadataInitializer __ptrauth_objc_method_list_imp _swiftMetadataInitializer_NEVER_USE[0];

    _objc_swiftMetadataInitializer swiftMetadataInitializer() const {
        if (flags & RO_HAS_SWIFT_INITIALIZER) {
            return _swiftMetadataInitializer_NEVER_USE[0];
        } else {
            return nil;
        }
    }

    const char *getName() const {
        return name.load(std::memory_order_acquire);
    }

    static const uint16_t methodListPointerDiscriminator = 0xC310;
#if 0 // FIXME: enable this when we get a non-empty definition of __ptrauth_objc_method_list_pointer from ptrauth.h.
        static_assert(std::is_same<
                      void * __ptrauth_objc_method_list_pointer *,
                      void * __ptrauth(ptrauth_key_method_list_pointer, 1, methodListPointerDiscriminator) *>::value,
                      "Method list pointer signing discriminator must match ptrauth.h");
#endif

    method_list_t *baseMethods() const {
#if __has_feature(ptrauth_calls)
        method_list_t *ptr = ptrauth_strip((method_list_t *)baseMethodList, ptrauth_key_method_list_pointer);
        if (ptr == nullptr)
            return nullptr;

        // Don't auth if the class_ro and the method list are both in the shared cache.
        // This is secure since they'll be read-only, and this allows the shared cache
        // to cut down on the number of signed pointers it has.
        bool roInSharedCache = objc::inSharedCache((uintptr_t)this);
        bool listInSharedCache = objc::inSharedCache((uintptr_t)ptr);
        if (roInSharedCache && listInSharedCache)
            return ptr;

        // Auth all other small lists.
        if (ptr->isSmallList())
            ptr = ptrauth_auth_data((method_list_t *)baseMethodList,
                                    ptrauth_key_method_list_pointer,
                                    ptrauth_blend_discriminator(&baseMethodList,
                                                                methodListPointerDiscriminator));
        return ptr;
#else
        return (method_list_t *)baseMethodList;
#endif
    }

    uintptr_t baseMethodListPtrauthData() const {
        return ptrauth_blend_discriminator(&baseMethodList,
                                           methodListPointerDiscriminator);
    }

    class_ro_t *duplicate() const {
        bool hasSwiftInitializer = flags & RO_HAS_SWIFT_INITIALIZER;

        size_t size = sizeof(*this);
        if (hasSwiftInitializer)
            size += sizeof(_swiftMetadataInitializer_NEVER_USE[0]);

        class_ro_t *ro = (class_ro_t *)memdup(this, size);

        if (hasSwiftInitializer)
            ro->_swiftMetadataInitializer_NEVER_USE[0] = this->_swiftMetadataInitializer_NEVER_USE[0];

#if __has_feature(ptrauth_calls)
        // Re-sign the method list pointer if it was signed.
        // NOTE: It is possible for a signed pointer to have a signature
        // that is all zeroes. This is indistinguishable from a raw pointer.
        // This code will treat such a pointer as signed and re-sign it. A
        // false positive is safe: method list pointers are either authed or
        // stripped, so if baseMethods() doesn't expect it to be signed, it
        // will ignore the signature.
        void *strippedBaseMethodList = ptrauth_strip(baseMethodList, ptrauth_key_method_list_pointer);
        void *signedBaseMethodList = ptrauth_sign_unauthenticated(strippedBaseMethodList,
                                                                  ptrauth_key_method_list_pointer,
                                                                  baseMethodListPtrauthData());
        if (baseMethodList == signedBaseMethodList) {
            ro->baseMethodList = ptrauth_auth_and_resign(baseMethodList,
                                                         ptrauth_key_method_list_pointer,
                                                         baseMethodListPtrauthData(),
                                                         ptrauth_key_method_list_pointer,
                                                         ro->baseMethodListPtrauthData());
        } else {
            // Special case: a class_ro_t in the shared cache pointing to a
            // method list in the shared cache will not have a signed pointer,
            // but the duplicate will be expected to have a signed pointer since
            // it's not in the shared cache. Detect that and sign it.
            bool roInSharedCache = objc::inSharedCache((uintptr_t)this);
            bool listInSharedCache = objc::inSharedCache((uintptr_t)strippedBaseMethodList);
            if (roInSharedCache && listInSharedCache)
                ro->baseMethodList = ptrauth_sign_unauthenticated(strippedBaseMethodList,
                                                                  ptrauth_key_method_list_pointer,
                                                                  ro->baseMethodListPtrauthData());
        }
#endif

        return ro;
    }

    Class getNonMetaclass() const {
        ASSERT(flags & RO_META);
        return nonMetaclass;
    }

    const uint8_t *getIvarLayout() const {
        if (flags & RO_META)
            return nullptr;
        return ivarLayout;
    }
};

总结:

写到这里,篇幅有些长了。我们发现objc2中类的数据结构的相比较之前的版本更为复杂,层级也比较深。

  • object_class嵌套了class_data_bits_t类型的结构体bits
  • 结构体bits只有一个uintptr_t类型的成员bits,但作为c++结构体,它扩展一些类的存取方法,在成员bits有限的bit位,存储了指向class_rw_t结构体指针,以及一些其他的信息比如:hasCustomRR、swift相关。
  • class_rw_t包含了flag,witness,firstSubClass,nextsiblingClass等成员以及最重要的一个共用体指针ro_or_rw_ext。
  • ro_or_rw_ext可以是class_ro_t类型,也可以是class_rw_ext_t类型。
  • class_rw_ext_t包含ro、方法列表,属性列表、协议列表等
  • class_ro_t不仅包含方法列表、属性列表、协议列表,还有一些其他的类相关内容。

那么class_ro_t和class_rw_ext_t有什么区别?相对以前版本,为什么苹果给类的数据结构设计这么多的层级呢?rw 、 rw_ext、ro的作用分别是什么?听下回分解吧。

你可能感兴趣的:(005-类结构探索(一))