先上结论
- 类A的Category会在编译期编译成category_t结构体,确定了类对象、方法、协议、属性等信息,并且存放在Mach-O文件的DATA(objc_catlist)和DATA(objc_nlcatlist)段中,在运行期把方法列表合并到A类的方法列表的头节点(原类方法向尾节点后移), 且分类中方法合并顺序与编译顺序相反,即最后编译的分类方法会在合并列表的头节点
- Load方法在程序启动之前调用,调用形式为函数指针,非消息转发,类和分类中都会调用,并且只会调用一次,调用规则为:先主类,按照编译顺序,顺序地根据继承关系由父类向子类调用;再分类,按照编译顺序的顺序依次调用;Load方法如有实现,只会调用一次,没有实现,不会调用。
- Initialize方法在类对象第一次触发该类的消息转发函数时调用,并且只会调用一次,但是当子类第一次调用且未实现该方法时,由于转发机制,会调用父类的Initialize方法
工具
xcrun、clang
xcrun -sdk iphoneos clang -arch arm64 -rewrite-objc GWThread+Test.m
xcrun用于指定编译的sdk,arch参数指定架构,-rewrite-objc参数把objc代码改写成c++。若出现找不到sdk的错误,请检查开发者目录。objc源码
https://opensource.apple.com/tarballs/
下载最新版本,阅读源码的意义在于理解底层原理,并且学习优秀的设计,可以不纠结具体的api
证明
证明一
我们随意在一个iOS工程新建一个Category类,这里以GWThread+Test为例,实现一个load接口,然后利用xcrun和clang输出C++代码
xcrun -sdk iphoneos clang -arch arm64 -rewrite-objc GWThread+Test.m
具体输出的代码如下:
//category结构体的定义
struct _category_t {
const char *name;
struct _class_t *cls;
const struct _method_list_t *instance_methods;
const struct _method_list_t *class_methods;
const struct _protocol_list_t *protocols;
const struct _prop_list_t *properties;
};
//方法列表结构体的定义,不知道为何_method_list_t被注释,clang生成的代码细节不是百分百正确,我们只参考调用的顺序逻辑
static struct /*_method_list_t*/ {
unsigned int entsize; // sizeof(struct _objc_method)
unsigned int method_count;
struct _objc_method method_list[1];
} _OBJC_$_CATEGORY_CLASS_METHODS_GWThread_$_Test __attribute__ ((used, section ("__DATA,__objc_const"))) = {
sizeof(_objc_method),
1,
{{(struct objc_selector *)"load", "v16@0:8", (void *)_C_GWThread_Test_load}}
};
//分类结构体的初始化
static struct _category_t _OBJC_$_CATEGORY_GWThread_$_Test __attribute__ ((used, section ("__DATA,__objc_const"))) =
{
"GWThread",
0, //&OBJC_CLASS_$_GWThread,
0,
(const struct _method_list_t *)&_OBJC_$_CATEGORY_CLASS_METHODS_GWThread_$_Test,
0,
0,
};
//引用分类对应的主类
extern "C" __declspec(dllimport) struct _class_t OBJC_CLASS_$_GWThread;
//执行函数指针,确定分类对应的主类
static void OBJC_CATEGORY_SETUP_$_GWThread_$_Test(void ) {
_OBJC_$_CATEGORY_GWThread_$_Test.cls = &OBJC_CLASS_$_GWThread;
}
//OBJC_CATEGORY_SETUP数组的初始化,传入函数指针
#pragma section(".objc_inithooks$B", long, read, write)
__declspec(allocate(".objc_inithooks$B")) static void *OBJC_CATEGORY_SETUP[] = {
(void *)&OBJC_CATEGORY_SETUP_$_GWThread_$_Test,
};
//L_OBJC_LABEL_CATEGORY_数组的初始化,并添加到mach-o文件中__DATA段的__objc_catlist位置
static struct _category_t *L_OBJC_LABEL_CATEGORY_$ [1] __attribute__((used, section ("__DATA, __objc_catlist,regular,no_dead_strip")))= {
&_OBJC_$_CATEGORY_GWThread_$_Test,
};
//_OBJC_LABEL_NONLAZY_CATEGORY_数组的初始化
static struct _category_t *_OBJC_LABEL_NONLAZY_CATEGORY_$[] = {
&_OBJC_$_CATEGORY_GWThread_$_Test,
};
如上所述,在编译期会确定分类的结构、方法实现、以及对应的主类等信息
我们再看源码:
// Attach method lists and properties and protocols from categories to a class.
// Assumes the categories in cats are all loaded and sorted by load order,
// oldest categories first.
static void
attachCategories(Class cls, category_list *cats, bool flush_caches)
{
if (!cats) return;
if (PrintReplacedMethods) printReplacements(cls, cats);
bool isMeta = cls->isMetaClass();
// fixme rearrange to remove these intermediate allocations
//1.分配分类中所有方法、属性、协议列表的合适内存
method_list_t **mlists = (method_list_t **)
malloc(cats->count * sizeof(*mlists));
property_list_t **proplists = (property_list_t **)
malloc(cats->count * sizeof(*proplists));
protocol_list_t **protolists = (protocol_list_t **)
malloc(cats->count * sizeof(*protolists));
// Count backwards through cats to get newest categories first
int mcount = 0;
int propcount = 0;
int protocount = 0;
int i = cats->count;
bool fromBundle = NO;
//2.按照编译顺序的逆序,取出所有分类的方法、协议、属性,存放在刚刚分配的内存中
while (i--) {
auto& entry = cats->list[I];
method_list_t *mlist = entry.cat->methodsForMeta(isMeta);
if (mlist) {
mlists[mcount++] = mlist;
fromBundle |= entry.hi->isBundle();
}
property_list_t *proplist =
entry.cat->propertiesForMeta(isMeta, entry.hi);
if (proplist) {
proplists[propcount++] = proplist;
}
protocol_list_t *protolist = entry.cat->protocols;
if (protolist) {
protolists[protocount++] = protolist;
}
}
auto rw = cls->data();
prepareMethodLists(cls, mlists, mcount, NO, fromBundle);
//3.将分类的方法列表附加到主类的rw结构体中
rw->methods.attachLists(mlists, mcount);
free(mlists);
if (flush_caches && mcount > 0) flushCaches(cls);
//4.将分类的属性列表附加到主类的rw结构体中
rw->properties.attachLists(proplists, propcount);
free(proplists);
//5.将分类的协议列表附加到主类的rw结构体中
rw->protocols.attachLists(protolists, protocount);
free(protolists);
}
void attachLists(List* const * addedLists, uint32_t addedCount) {
if (addedCount == 0) return;
//分支一:如果已经创建了array(代表已经有方法了)
if (hasArray()) {
// many lists -> many lists
//取出原类方法的个数
uint32_t oldCount = array()->count;
//计算出合并之后方法的个数
uint32_t newCount = oldCount + addedCount;
//在原来存放方法列表的内存的首地址连续分配合并之后的总内存
setArray((array_t *)realloc(array(), array_t::byteSize(newCount)));
//修改方法个数
array()->count = newCount;
//将原来类方法列表的首地址向尾节点移动addedCount个指针(考虑内存重叠)
memmove(array()->lists + addedCount, array()->lists,
oldCount * sizeof(array()->lists[0]));
//将分类中新方法的首地址复制到老方法的头节点,并覆盖原来的内存区域(不考虑内存重叠)
memcpy(array()->lists, addedLists,
addedCount * sizeof(array()->lists[0]));
}
else if (!list && addedCount == 1) {
// 0 lists -> 1 list
list = addedLists[0];
}
//分支二:没有创建array,就创建一个array把分类的方法列表塞进去
else {
// 1 list -> many lists
List* oldList = list;
uint32_t oldCount = oldList ? 1 : 0;
uint32_t newCount = oldCount + addedCount;
setArray((array_t *)malloc(array_t::byteSize(newCount)));
array()->count = newCount;
//这一步看不出意图
if (oldList) array()->lists[addedCount] = oldList;
memcpy(array()->lists, addedLists,
addedCount * sizeof(array()->lists[0]));
}
}
在objc-runtime-new.mm
文件中重点看attachCategories和attachLists方法,具体流程在上图中,这两个方法就是把category的方法、属性、协议合并到主类中,需要注意一下内存分布,如下图
[图片上传失败...(image-d436aa-1536073437496)]
假设Category2和Category1为GWThread+Test2和GWThread+Test1,且Test2的编译晚于Test1,当程序运行起来后,Test2中的方法排在method_list的前面,Test1次之,主类的方法最后,因此如果在GWThread类和分类中若有相同的方法,在调用时,Test2中的方法会被优先查找到。
证明二
在GWThread+Test2和GWThread+Test1复写load方法,并在load处setBreakPoint。启动程序后,利用bt得到调用栈帧
frame #0: 0x00000001086523cc 线程保活`+[GWThread(self=GWThread, _cmd="load") load] at GWThread+Test.m:16
frame #1: 0x0000000108f575c0 libobjc.A.dylib`call_load_methods + 236
frame #2: 0x0000000108f5840a libobjc.A.dylib`load_images + 70
frame #3: 0x000000010865ecb7 dyld_sim`dyld::notifySingle(dyld_image_states, ImageLoader const*, ImageLoader::InitializerTimingList*) + 311
frame #4: 0x000000010866af88 dyld_sim`ImageLoader::recursiveInitialization(ImageLoader::LinkContext const&, unsigned int, char const*, ImageLoader::InitializerTimingList&, ImageLoader::UninitedUpwards&) + 322
frame #5: 0x000000010866a15e dyld_sim`ImageLoader::processInitializers(ImageLoader::LinkContext const&, unsigned int, ImageLoader::InitializerTimingList&, ImageLoader::UninitedUpwards&) + 134
frame #6: 0x000000010866a1f2 dyld_sim`ImageLoader::runInitializers(ImageLoader::LinkContext const&, ImageLoader::InitializerTimingList&) + 74
frame #7: 0x000000010865f052 dyld_sim`dyld::initializeMainExecutable() + 196
frame #8: 0x0000000108662b6b dyld_sim`dyld::_main(macho_header const*, unsigned long, int, char const**, char const**, char const**, unsigned long*) + 4408
frame #9: 0x000000010865e3db dyld_sim`start_sim + 136
frame #10: 0x0000000116fc0ded dyld`dyld::useSimulatorDyld(int, macho_header const*, char const*, int, char const**, char const**, char const**, unsigned long*, unsigned long*) + 2200
frame #11: 0x0000000116fbe7a3 dyld`dyld::_main(macho_header const*, unsigned long, int, char const**, char const**, char const**, unsigned long*) + 436
frame #12: 0x0000000116fba3d4 dyld`dyldbootstrap::start(macho_header const*, int, char const**, long, macho_header const*, unsigned long*) + 453
frame #13: 0x0000000116fba1d2 dyld`_dyld_start + 54
可见先后调用了load_images和call_load_methods方法,我们看一下源码
void
load_images(const char *path __unused, const struct mach_header *mh)
{
// Return without taking locks if there are no +load methods here.
if (!hasLoadMethods((const headerType *)mh)) return;
recursive_mutex_locker_t lock(loadMethodLock);
// Discover load methods
{
rwlock_writer_t lock2(runtimeLock);
prepare_load_methods((const headerType *)mh);
}
// Call +load methods (without runtimeLock - re-entrant)
call_load_methods();
}
void prepare_load_methods(const headerType *mhdr)
{
size_t count, I;
runtimeLock.assertWriting();
classref_t *classlist =
_getObjc2NonlazyClassList(mhdr, &count);
for (i = 0; i < count; i++) {
schedule_class_load(remapClass(classlist[i]));
}
category_t **categorylist = _getObjc2NonlazyCategoryList(mhdr, &count);
for (i = 0; i < count; i++) {
category_t *cat = categorylist[I];
Class cls = remapClass(cat->cls);
if (!cls) continue; // category for ignored weak-linked class
realizeClass(cls);
assert(cls->ISA()->isRealized());
add_category_to_loadable_list(cat);
}
}
// Recursively schedule +load for cls and any un-+load-ed superclasses.
// cls must already be connected.
static void schedule_class_load(Class cls)
{
if (!cls) return;
assert(cls->isRealized()); // _read_images should realize
if (cls->data()->flags & RW_LOADED) return;
// Ensure superclass-first ordering
schedule_class_load(cls->superclass);
add_class_to_loadable_list(cls);
cls->setInfo(RW_LOADED);
}
void add_category_to_loadable_list(Category cat)
{
IMP method;
loadMethodLock.assertLocked();
method = _category_getLoadMethod(cat);
// Don't bother if cat has no +load method
if (!method) return;
if (PrintLoading) {
_objc_inform("LOAD: category '%s(%s)' scheduled for +load",
_category_getClassName(cat), _category_getName(cat));
}
if (loadable_categories_used == loadable_categories_allocated) {
loadable_categories_allocated = loadable_categories_allocated*2 + 16;
loadable_categories = (struct loadable_category *)
realloc(loadable_categories,
loadable_categories_allocated *
sizeof(struct loadable_category));
}
loadable_categories[loadable_categories_used].cat = cat;
loadable_categories[loadable_categories_used].method = method;
loadable_categories_used++;
}
static void call_class_loads(void)
{
int I;
// Detach current loadable list.
struct loadable_class *classes = loadable_classes;
int used = loadable_classes_used;
loadable_classes = nil;
loadable_classes_allocated = 0;
loadable_classes_used = 0;
// Call all +loads for the detached list.
for (i = 0; i < used; i++) {
Class cls = classes[i].cls;
load_method_t load_method = (load_method_t)classes[i].method;
if (!cls) continue;
if (PrintLoading) {
_objc_inform("LOAD: +[%s load]\n", cls->nameForLogging());
}
(*load_method)(cls, SEL_load);
}
// Destroy the detached list.
if (classes) free(classes);
}
static bool call_category_loads(void)
{
int i, shift;
bool new_categories_added = NO;
// Detach current loadable list.
struct loadable_category *cats = loadable_categories;
int used = loadable_categories_used;
int allocated = loadable_categories_allocated;
loadable_categories = nil;
loadable_categories_allocated = 0;
loadable_categories_used = 0;
// Call all +loads for the detached list.
for (i = 0; i < used; i++) {
Category cat = cats[i].cat;
load_method_t load_method = (load_method_t)cats[i].method;
Class cls;
if (!cat) continue;
cls = _category_getClass(cat);
if (cls && cls->isLoadable()) {
if (PrintLoading) {
_objc_inform("LOAD: +[%s(%s) load]\n",
cls->nameForLogging(),
_category_getName(cat));
}
(*load_method)(cls, SEL_load);
cats[i].cat = nil;
}
}
// Compact detached list (order-preserving)
shift = 0;
for (i = 0; i < used; i++) {
if (cats[i].cat) {
cats[i-shift] = cats[I];
} else {
shift++;
}
}
used -= shift;
// Copy any new +load candidates from the new list to the detached list.
new_categories_added = (loadable_categories_used > 0);
for (i = 0; i < loadable_categories_used; i++) {
if (used == allocated) {
allocated = allocated*2 + 16;
cats = (struct loadable_category *)
realloc(cats, allocated *
sizeof(struct loadable_category));
}
cats[used++] = loadable_categories[I];
}
// Destroy the new list.
if (loadable_categories) free(loadable_categories);
// Reattach the (now augmented) detached list.
// But if there's nothing left to load, destroy the list.
if (used) {
loadable_categories = cats;
loadable_categories_used = used;
loadable_categories_allocated = allocated;
} else {
if (cats) free(cats);
loadable_categories = nil;
loadable_categories_used = 0;
loadable_categories_allocated = 0;
}
if (PrintLoading) {
if (loadable_categories_used != 0) {
_objc_inform("LOAD: %d categories still waiting for +load\n",
loadable_categories_used);
}
}
return new_categories_added;
}
从源码得知在prepare_load_methods方法中,对主类按照编译顺序,先父类后主类递归地加入到loadable_list中,并且通过类中的flags和RW_LOADED作位运算保证该类只会加入一次;主类加入完毕后,再按照编译顺序依次把分类加入到list中。
在call_load_methods中,会按照上述loadable_list中的次序,通过函数指针的方式找到load方法的地址依次调用,如果没有实现load方法,则不调用,并且Category的load方法不会覆盖主类。
证明三
按照证明二类似的办法,我们得到栈帧调用记录
thread #1, queue = 'com.apple.main-thread', stop reason = breakpoint 4.1
frame #0: 0x000000010576559e 线程保活`+[GWThread initialize](self=GWThread, _cmd="initialize") at GWThread.m:25
* frame #1: 0x000000010606ab04 libobjc.A.dylib`CALLING_SOME_+initialize_METHOD + 19
frame #2: 0x000000010606ae9e libobjc.A.dylib`_class_initialize + 276
frame #3: 0x000000010606adda libobjc.A.dylib`_class_initialize + 80
frame #4: 0x0000000106071824 libobjc.A.dylib`lookUpImpOrForward + 226
frame #5: 0x0000000106081414 libobjc.A.dylib`_objc_msgSend_uncached + 68
在源码中找到class_initialize方法
void _class_initialize(Class cls)
{
assert(!cls->isMetaClass());
Class supercls;
bool reallyInitialize = NO;
// Make sure super is done initializing BEFORE beginning to initialize cls.
// See note about deadlock above.
supercls = cls->superclass;
if (supercls && !supercls->isInitialized()) {
_class_initialize(supercls);
}
// Try to atomically set CLS_INITIALIZING.
{
monitor_locker_t lock(classInitLock);
if (!cls->isInitialized() && !cls->isInitializing()) {
cls->setInitializing();
reallyInitialize = YES;
}
}
……
callInitialize(cls);
……
}
void callInitialize(Class cls)
{
((void(*)(Class, SEL))objc_msgSend)(cls, SEL_initialize);
asm("");
}
当类对象第一次调用消息函数时,会去调用runtime中的lookUpImpOrForward函数,进而判断class_initilize函数是否需要调用。
可见又是通过先父类后子类递归调用class_initilize方法,并且通过reallyInitialize布尔变量控制只调用一次,最后通过消息转发的形式调用类的initialize方法,所以当子类没有实现这个方法时,而父类实现了,就会出现initialize方法调用两次的情况,其他情况依次类推。
需要改进之处
上文说到,编译期Category和Class会在__DATA段的某处,笔者利用ASLR和Mach-O的偏移可以在程序运行期证明Class的存在,然而不能证明Category的存在,原因可能在于Category在合并完方法后内存会被释放,所以期待有人可以利用底层汇编知识证明category_t的存在。
以上所有证明都可以用iOS程序验证,谢谢大家!