地表最强Category和Load、Initialize方法总结

先上结论

类A的Category会在编译期编译成category_t结构体，确定了类对象、方法、协议、属性等信息，并且存放在Mach-O文件的DATA(objc_catlist)和DATA(objc_nlcatlist)段中,在运行期把方法列表合并到A类的方法列表的头节点（原类方法向尾节点后移）, 且分类中方法合并顺序与编译顺序相反，即最后编译的分类方法会在合并列表的头节点
Load方法在程序启动之前调用，调用形式为函数指针，非消息转发，类和分类中都会调用，并且只会调用一次，调用规则为：先主类，按照编译顺序，顺序地根据继承关系由父类向子类调用;再分类，按照编译顺序的顺序依次调用；Load方法如有实现，只会调用一次，没有实现，不会调用。
Initialize方法在类对象第一次触发该类的消息转发函数时调用，并且只会调用一次，但是当子类第一次调用且未实现该方法时，由于转发机制，会调用父类的Initialize方法

工具

xcrun、clang
xcrun -sdk iphoneos clang -arch arm64 -rewrite-objc GWThread+Test.m
xcrun用于指定编译的sdk，arch参数指定架构，-rewrite-objc参数把objc代码改写成c++。若出现找不到sdk的错误，请检查开发者目录。
objc源码
https://opensource.apple.com/tarballs/
下载最新版本，阅读源码的意义在于理解底层原理，并且学习优秀的设计，可以不纠结具体的api

证明

证明一

我们随意在一个iOS工程新建一个Category类，这里以GWThread+Test为例，实现一个load接口，然后利用xcrun和clang输出C++代码
xcrun -sdk iphoneos clang -arch arm64 -rewrite-objc GWThread+Test.m
具体输出的代码如下：

//category结构体的定义
struct _category_t {
    const char *name; 
    struct _class_t *cls;
    const struct _method_list_t *instance_methods;
    const struct _method_list_t *class_methods;
    const struct _protocol_list_t *protocols;
    const struct _prop_list_t *properties;
};
//方法列表结构体的定义，不知道为何_method_list_t被注释，clang生成的代码细节不是百分百正确，我们只参考调用的顺序逻辑
static struct /*_method_list_t*/ {
    unsigned int entsize;  // sizeof(struct _objc_method)
    unsigned int method_count;
    struct _objc_method method_list[1];
} _OBJC_$_CATEGORY_CLASS_METHODS_GWThread_$_Test __attribute__ ((used, section ("__DATA,__objc_const"))) = {
    sizeof(_objc_method),
    1,
    {{(struct objc_selector *)"load", "v16@0:8", (void *)_C_GWThread_Test_load}}
};
//分类结构体的初始化
static struct _category_t _OBJC_$_CATEGORY_GWThread_$_Test __attribute__ ((used, section ("__DATA,__objc_const"))) = 
{
    "GWThread",
    0,  //&OBJC_CLASS_$_GWThread,
    0,
    (const struct _method_list_t *)&_OBJC_$_CATEGORY_CLASS_METHODS_GWThread_$_Test,
    0,
    0,
};
//引用分类对应的主类
extern "C" __declspec(dllimport) struct _class_t OBJC_CLASS_$_GWThread;
//执行函数指针，确定分类对应的主类
static void OBJC_CATEGORY_SETUP_$_GWThread_$_Test(void ) {
    _OBJC_$_CATEGORY_GWThread_$_Test.cls = &OBJC_CLASS_$_GWThread;
}
//OBJC_CATEGORY_SETUP数组的初始化，传入函数指针
#pragma section(".objc_inithooks$B", long, read, write)
__declspec(allocate(".objc_inithooks$B")) static void *OBJC_CATEGORY_SETUP[] = {
    (void *)&OBJC_CATEGORY_SETUP_$_GWThread_$_Test,
};
//L_OBJC_LABEL_CATEGORY_数组的初始化，并添加到mach-o文件中__DATA段的__objc_catlist位置
static struct _category_t *L_OBJC_LABEL_CATEGORY_$ [1] __attribute__((used, section ("__DATA, __objc_catlist,regular,no_dead_strip")))= {
    &_OBJC_$_CATEGORY_GWThread_$_Test,
};
//_OBJC_LABEL_NONLAZY_CATEGORY_数组的初始化
static struct _category_t *_OBJC_LABEL_NONLAZY_CATEGORY_$[] = {
        &_OBJC_$_CATEGORY_GWThread_$_Test,
};

如上所述，在编译期会确定分类的结构、方法实现、以及对应的主类等信息

我们再看源码：

// Attach method lists and properties and protocols from categories to a class.
// Assumes the categories in cats are all loaded and sorted by load order, 
// oldest categories first.
static void 
attachCategories(Class cls, category_list *cats, bool flush_caches)
{
    if (!cats) return;
    if (PrintReplacedMethods) printReplacements(cls, cats);

    bool isMeta = cls->isMetaClass();

    // fixme rearrange to remove these intermediate allocations
    //1.分配分类中所有方法、属性、协议列表的合适内存
    method_list_t **mlists = (method_list_t **)
        malloc(cats->count * sizeof(*mlists));
    property_list_t **proplists = (property_list_t **)
        malloc(cats->count * sizeof(*proplists));
    protocol_list_t **protolists = (protocol_list_t **)
        malloc(cats->count * sizeof(*protolists));

    // Count backwards through cats to get newest categories first
    int mcount = 0;
    int propcount = 0;
    int protocount = 0;
    int i = cats->count;
    bool fromBundle = NO;
    //2.按照编译顺序的逆序，取出所有分类的方法、协议、属性，存放在刚刚分配的内存中
    while (i--) {
        auto& entry = cats->list[I];

        method_list_t *mlist = entry.cat->methodsForMeta(isMeta);
        if (mlist) {
            mlists[mcount++] = mlist;
            fromBundle |= entry.hi->isBundle();
        }

        property_list_t *proplist = 
            entry.cat->propertiesForMeta(isMeta, entry.hi);
        if (proplist) {
            proplists[propcount++] = proplist;
        }

        protocol_list_t *protolist = entry.cat->protocols;
        if (protolist) {
            protolists[protocount++] = protolist;
        }
    }

    auto rw = cls->data();

    prepareMethodLists(cls, mlists, mcount, NO, fromBundle);
    //3.将分类的方法列表附加到主类的rw结构体中
    rw->methods.attachLists(mlists, mcount);
    free(mlists);
    if (flush_caches  &&  mcount > 0) flushCaches(cls);
    //4.将分类的属性列表附加到主类的rw结构体中
    rw->properties.attachLists(proplists, propcount);
    free(proplists);
    //5.将分类的协议列表附加到主类的rw结构体中
    rw->protocols.attachLists(protolists, protocount);
    free(protolists);
}

void attachLists(List* const * addedLists, uint32_t addedCount) {
        if (addedCount == 0) return;
        //分支一：如果已经创建了array（代表已经有方法了）
        if (hasArray()) {
            // many lists -> many lists
            //取出原类方法的个数
            uint32_t oldCount = array()->count;
            //计算出合并之后方法的个数
            uint32_t newCount = oldCount + addedCount;
            //在原来存放方法列表的内存的首地址连续分配合并之后的总内存
            setArray((array_t *)realloc(array(), array_t::byteSize(newCount)));
            //修改方法个数
            array()->count = newCount;
            //将原来类方法列表的首地址向尾节点移动addedCount个指针（考虑内存重叠）
            memmove(array()->lists + addedCount, array()->lists, 
                    oldCount * sizeof(array()->lists[0]));
            //将分类中新方法的首地址复制到老方法的头节点，并覆盖原来的内存区域(不考虑内存重叠)
            memcpy(array()->lists, addedLists, 
                   addedCount * sizeof(array()->lists[0]));
        }
        else if (!list  &&  addedCount == 1) {
            // 0 lists -> 1 list
            list = addedLists[0];
        } 
        //分支二：没有创建array，就创建一个array把分类的方法列表塞进去
        else {
            // 1 list -> many lists
            List* oldList = list;
            uint32_t oldCount = oldList ? 1 : 0;
            uint32_t newCount = oldCount + addedCount;
            setArray((array_t *)malloc(array_t::byteSize(newCount)));
            array()->count = newCount;
            //这一步看不出意图
            if (oldList) array()->lists[addedCount] = oldList;
            memcpy(array()->lists, addedLists, 
                   addedCount * sizeof(array()->lists[0]));
        }
    }

在objc-runtime-new.mm文件中重点看attachCategories和attachLists方法，具体流程在上图中，这两个方法就是把category的方法、属性、协议合并到主类中，需要注意一下内存分布，如下图
[图片上传失败...(image-d436aa-1536073437496)]
假设Category2和Category1为GWThread+Test2和GWThread+Test1，且Test2的编译晚于Test1，当程序运行起来后，Test2中的方法排在method_list的前面，Test1次之，主类的方法最后，因此如果在GWThread类和分类中若有相同的方法，在调用时，Test2中的方法会被优先查找到。

证明二

在GWThread+Test2和GWThread+Test1复写load方法，并在load处setBreakPoint。启动程序后，利用bt得到调用栈帧

frame #0: 0x00000001086523cc 线程保活`+[GWThread(self=GWThread, _cmd="load") load] at GWThread+Test.m:16
frame #1: 0x0000000108f575c0 libobjc.A.dylib`call_load_methods + 236
frame #2: 0x0000000108f5840a libobjc.A.dylib`load_images + 70
frame #3: 0x000000010865ecb7 dyld_sim`dyld::notifySingle(dyld_image_states, ImageLoader const*, ImageLoader::InitializerTimingList*) + 311
frame #4: 0x000000010866af88 dyld_sim`ImageLoader::recursiveInitialization(ImageLoader::LinkContext const&, unsigned int, char const*, ImageLoader::InitializerTimingList&, ImageLoader::UninitedUpwards&) + 322
frame #5: 0x000000010866a15e dyld_sim`ImageLoader::processInitializers(ImageLoader::LinkContext const&, unsigned int, ImageLoader::InitializerTimingList&, ImageLoader::UninitedUpwards&) + 134
frame #6: 0x000000010866a1f2 dyld_sim`ImageLoader::runInitializers(ImageLoader::LinkContext const&, ImageLoader::InitializerTimingList&) + 74
frame #7: 0x000000010865f052 dyld_sim`dyld::initializeMainExecutable() + 196
frame #8: 0x0000000108662b6b dyld_sim`dyld::_main(macho_header const*, unsigned long, int, char const**, char const**, char const**, unsigned long*) + 4408
frame #9: 0x000000010865e3db dyld_sim`start_sim + 136
frame #10: 0x0000000116fc0ded dyld`dyld::useSimulatorDyld(int, macho_header const*, char const*, int, char const**, char const**, char const**, unsigned long*, unsigned long*) + 2200
frame #11: 0x0000000116fbe7a3 dyld`dyld::_main(macho_header const*, unsigned long, int, char const**, char const**, char const**, unsigned long*) + 436
frame #12: 0x0000000116fba3d4 dyld`dyldbootstrap::start(macho_header const*, int, char const**, long, macho_header const*, unsigned long*) + 453
frame #13: 0x0000000116fba1d2 dyld`_dyld_start + 54

可见先后调用了load_images和call_load_methods方法，我们看一下源码

void
load_images(const char *path __unused, const struct mach_header *mh)
{
    // Return without taking locks if there are no +load methods here.
    if (!hasLoadMethods((const headerType *)mh)) return;

    recursive_mutex_locker_t lock(loadMethodLock);

    // Discover load methods
    {
        rwlock_writer_t lock2(runtimeLock);
        prepare_load_methods((const headerType *)mh);
    }

    // Call +load methods (without runtimeLock - re-entrant)
    call_load_methods();
}

void prepare_load_methods(const headerType *mhdr)
{
    size_t count, I;

    runtimeLock.assertWriting();

    classref_t *classlist = 
        _getObjc2NonlazyClassList(mhdr, &count);
    for (i = 0; i < count; i++) {
        schedule_class_load(remapClass(classlist[i]));
    }

    category_t **categorylist = _getObjc2NonlazyCategoryList(mhdr, &count);
    for (i = 0; i < count; i++) {
        
        category_t *cat = categorylist[I];
        Class cls = remapClass(cat->cls);
        if (!cls) continue;  // category for ignored weak-linked class
        realizeClass(cls);
        assert(cls->ISA()->isRealized());
        add_category_to_loadable_list(cat);
    }
}
// Recursively schedule +load for cls and any un-+load-ed superclasses.
// cls must already be connected.
static void schedule_class_load(Class cls)
{
    if (!cls) return;
    assert(cls->isRealized());  // _read_images should realize

    if (cls->data()->flags & RW_LOADED) return;

    // Ensure superclass-first ordering
    schedule_class_load(cls->superclass);

    add_class_to_loadable_list(cls);
    cls->setInfo(RW_LOADED); 
}

void add_category_to_loadable_list(Category cat)
{
    IMP method;

    loadMethodLock.assertLocked();

    method = _category_getLoadMethod(cat);

    // Don't bother if cat has no +load method
    if (!method) return;

    if (PrintLoading) {
        _objc_inform("LOAD: category '%s(%s)' scheduled for +load", 
                     _category_getClassName(cat), _category_getName(cat));
    }
    
    if (loadable_categories_used == loadable_categories_allocated) {
        loadable_categories_allocated = loadable_categories_allocated*2 + 16;
        loadable_categories = (struct loadable_category *)
            realloc(loadable_categories,
                              loadable_categories_allocated *
                              sizeof(struct loadable_category));
    }

    loadable_categories[loadable_categories_used].cat = cat;
    loadable_categories[loadable_categories_used].method = method;
    loadable_categories_used++;
}

static void call_class_loads(void)
{
    int I;
    
    // Detach current loadable list.
    struct loadable_class *classes = loadable_classes;
    int used = loadable_classes_used;
    loadable_classes = nil;
    loadable_classes_allocated = 0;
    loadable_classes_used = 0;
    
    // Call all +loads for the detached list.
    for (i = 0; i < used; i++) {
        Class cls = classes[i].cls;
        load_method_t load_method = (load_method_t)classes[i].method;
        if (!cls) continue; 

        if (PrintLoading) {
            _objc_inform("LOAD: +[%s load]\n", cls->nameForLogging());
        }
        (*load_method)(cls, SEL_load);
    }
    
    // Destroy the detached list.
    if (classes) free(classes);
}

static bool call_category_loads(void)
{
    int i, shift;
    bool new_categories_added = NO;
    
    // Detach current loadable list.
    struct loadable_category *cats = loadable_categories;
    int used = loadable_categories_used;
    int allocated = loadable_categories_allocated;
    loadable_categories = nil;
    loadable_categories_allocated = 0;
    loadable_categories_used = 0;

    // Call all +loads for the detached list.
    for (i = 0; i < used; i++) {
        Category cat = cats[i].cat;
        load_method_t load_method = (load_method_t)cats[i].method;
        Class cls;
        if (!cat) continue;

        cls = _category_getClass(cat);
        if (cls  &&  cls->isLoadable()) {
            if (PrintLoading) {
                _objc_inform("LOAD: +[%s(%s) load]\n", 
                             cls->nameForLogging(), 
                             _category_getName(cat));
            }
            (*load_method)(cls, SEL_load);
            cats[i].cat = nil;
        }
    }

    // Compact detached list (order-preserving)
    shift = 0;
    for (i = 0; i < used; i++) {
        if (cats[i].cat) {
            cats[i-shift] = cats[I];
        } else {
            shift++;
        }
    }
    used -= shift;

    // Copy any new +load candidates from the new list to the detached list.
    new_categories_added = (loadable_categories_used > 0);
    for (i = 0; i < loadable_categories_used; i++) {
        if (used == allocated) {
            allocated = allocated*2 + 16;
            cats = (struct loadable_category *)
                realloc(cats, allocated *
                                  sizeof(struct loadable_category));
        }
        cats[used++] = loadable_categories[I];
    }

    // Destroy the new list.
    if (loadable_categories) free(loadable_categories);

    // Reattach the (now augmented) detached list. 
    // But if there's nothing left to load, destroy the list.
    if (used) {
        loadable_categories = cats;
        loadable_categories_used = used;
        loadable_categories_allocated = allocated;
    } else {
        if (cats) free(cats);
        loadable_categories = nil;
        loadable_categories_used = 0;
        loadable_categories_allocated = 0;
    }

    if (PrintLoading) {
        if (loadable_categories_used != 0) {
            _objc_inform("LOAD: %d categories still waiting for +load\n",
                         loadable_categories_used);
        }
    }

    return new_categories_added;
}

从源码得知在prepare_load_methods方法中，对主类按照编译顺序，先父类后主类递归地加入到loadable_list中，并且通过类中的flags和RW_LOADED作位运算保证该类只会加入一次；主类加入完毕后，再按照编译顺序依次把分类加入到list中。

在call_load_methods中，会按照上述loadable_list中的次序，通过函数指针的方式找到load方法的地址依次调用，如果没有实现load方法，则不调用，并且Category的load方法不会覆盖主类。

证明三

按照证明二类似的办法，我们得到栈帧调用记录

thread #1, queue = 'com.apple.main-thread', stop reason = breakpoint 4.1
    frame #0: 0x000000010576559e 线程保活`+[GWThread initialize](self=GWThread, _cmd="initialize") at GWThread.m:25
  * frame #1: 0x000000010606ab04 libobjc.A.dylib`CALLING_SOME_+initialize_METHOD + 19
    frame #2: 0x000000010606ae9e libobjc.A.dylib`_class_initialize + 276
    frame #3: 0x000000010606adda libobjc.A.dylib`_class_initialize + 80
    frame #4: 0x0000000106071824 libobjc.A.dylib`lookUpImpOrForward + 226
    frame #5: 0x0000000106081414 libobjc.A.dylib`_objc_msgSend_uncached + 68

在源码中找到class_initialize方法

void _class_initialize(Class cls)
{
    assert(!cls->isMetaClass());

    Class supercls;
    bool reallyInitialize = NO;

    // Make sure super is done initializing BEFORE beginning to initialize cls.
    // See note about deadlock above.
    supercls = cls->superclass;
    if (supercls  &&  !supercls->isInitialized()) {
        _class_initialize(supercls);
    }
    
    // Try to atomically set CLS_INITIALIZING.
    {
        monitor_locker_t lock(classInitLock);
        if (!cls->isInitialized() && !cls->isInitializing()) {
            cls->setInitializing();
            reallyInitialize = YES;
        }
    }
    ……
    callInitialize(cls);
    ……
 }
 
void callInitialize(Class cls)
{
    ((void(*)(Class, SEL))objc_msgSend)(cls, SEL_initialize);
    asm("");
}

当类对象第一次调用消息函数时，会去调用runtime中的lookUpImpOrForward函数，进而判断class_initilize函数是否需要调用。
可见又是通过先父类后子类递归调用class_initilize方法，并且通过reallyInitialize布尔变量控制只调用一次，最后通过消息转发的形式调用类的initialize方法，所以当子类没有实现这个方法时，而父类实现了，就会出现initialize方法调用两次的情况，其他情况依次类推。

需要改进之处

上文说到，编译期Category和Class会在__DATA段的某处，笔者利用ASLR和Mach-O的偏移可以在程序运行期证明Class的存在，然而不能证明Category的存在，原因可能在于Category在合并完方法后内存会被释放，所以期待有人可以利用底层汇编知识证明category_t的存在。

以上所有证明都可以用iOS程序验证，谢谢大家！

地表最强Category和Load、Initialize方法总结

先上结论

工具

证明

证明一

证明二

证明三

需要改进之处

你可能感兴趣的:(地表最强Category和Load、Initialize方法总结)