本文使用的 runtime 版本为 objc4-706。
Runtime 的入口,是可以在 objc-os.mm
找到的函数 _objc_init
,它被 libSystem 调用,进行初始化和加载 image,所谓 image,在 macOS 和 iOS 上来说,就是 Mach-O 文件。
void _objc_init(void)
{
static bool initialized = false;
if (initialized) return;
initialized = true;
// fixme defer initialization until an objc-using image is found?
environ_init();
tls_init();
static_init();
lock_init();
exception_init();
_dyld_objc_notify_register(&map_2_images, load_images, unmap_image);
}
在上面 _objc_init
的代码中,除去初始化函数的调用,最后一行是对 _dyld_objc_notify_register
的调用,这个函数接收三个函数当作回调,传递 image 信息让其自行处理。
其中 map_2_images
函数就是 image 中的 Objective-C 类的信息进入 runtime 发生的地方。
void
map_2_images(unsigned count, const char * const paths[],
const struct mach_header * const mhdrs[])
{
rwlock_writer_t lock(runtimeLock);
return map_images_nolock(count, paths, mhdrs);
}
map_2_images
函数在加锁后就转向了 map_images_nolock
函数。
void
map_images_nolock(unsigned mhCount, const char * const mhPaths[],
const struct mach_header * const mhdrs[])
{
...
if (hCount > 0) {
_read_images(hList, hCount, totalClasses, unoptimizedTotalClasses);
}
...
}
在 map_images_nolock
函数中,它检查传入的每个 image,如果 image 有 runtime 需要的信息,就将它记录在 hList
中,并将 hCount
加一,最终判断 hCount
来决定继续读取 image 中的数据,即 _read_images
函数。
void _read_images(header_info **hList, uint32_t hCount, int totalClasses, int unoptimizedTotalClasses)
{
...
#define EACH_HEADER \
hIndex = 0; \
hIndex < hCount && (hi = hList[hIndex]); \
hIndex++
...
// Discover classes. Fix up unresolved future classes. Mark bundle classes.
for (EACH_HEADER) {
if (! mustReadClasses(hi)) {
// Image is sufficiently optimized that we need not call readClass()
continue;
}
bool headerIsBundle = hi->isBundle();
bool headerIsPreoptimized = hi->isPreoptimized();
classref_t *classlist = _getObjc2ClassList(hi, &count);
for (i = 0; i < count; i++) {
Class cls = (Class)classlist[i];
Class newCls = readClass(cls, headerIsBundle, headerIsPreoptimized);
if (newCls != cls && newCls) {
// Class was moved but not deleted. Currently this occurs
// only when the new class resolved a future class.
// Non-lazily realize the class below
resolvedFutureClasses = (Class *)
realloc(resolvedFutureClasses,
(resolvedFutureClassCount+1) * sizeof(Class));
resolvedFutureClasses[resolvedFutureClassCount++] = newCls;
}
}
}
...
// Realize non-lazy classes (for +load methods and static instances)
for (EACH_HEADER) {
classref_t *classlist =
_getObjc2NonlazyClassList(hi, &count);
for (i = 0; i < count; i++) {
Class cls = remapClass(classlist[i]);
if (!cls) continue;
realizeClass(cls);
}
}
...
#undef EACH_HEADER
}
_read_images
函数中有两段关于类加载的部分。第一部分将每个 image 里的类信息都经过 readClass
函数处理一遍,将类的名字和类本身存到一个表里,这样就可以使用 objc_getClass
函数通过名字来获取类了。第二部分让 image 里的类经 realizedClass
函数处理一遍,先回顾一下类的定义。
struct class_ro_t {
uint32_t flags;
uint32_t instanceStart;
uint32_t instanceSize;
#ifdef __LP64__
uint32_t reserved;
#endif
const uint8_t * ivarLayout;
const char * name;
method_list_t * baseMethodList;
protocol_list_t * baseProtocols;
const ivar_list_t * ivars;
const uint8_t * weakIvarLayout;
property_list_t *baseProperties;
method_list_t *baseMethods() const {
return baseMethodList;
}
};
struct class_rw_t {
uint32_t flags;
uint32_t version;
const class_ro_t *ro;
method_array_t methods;
property_array_t properties;
protocol_array_t protocols;
Class firstSubclass;
Class nextSiblingClass;
char *demangledName;
...
};
struct objc_class : objc_object {
// Class ISA;
Class superclass;
cache_t cache; // formerly cache pointer and vtable
class_data_bits_t bits; // class_rw_t * plus custom rr/alloc flags
class_rw_t *data() {
return bits.data();
}
...
};
可以看到定义中,objc_class
的 data
方法返回类型是 class_rw_t *
,但是在 image 中的类信息,data
返回的实际上是 class_ro_t *
,这就需要 realizeClass
函数进行一些适当的操作将这些数据整对。
static Class realizeClass(Class cls)
{
...
ro = (const class_ro_t *)cls->data();
if (ro->flags & RO_FUTURE) {
// This was a future class. rw data is already allocated.
rw = cls->data();
ro = cls->data()->ro;
cls->changeInfo(RW_REALIZED|RW_REALIZING, RW_FUTURE);
} else {
// Normal class. Allocate writeable class data.
rw = (class_rw_t *)calloc(sizeof(class_rw_t), 1);
rw->ro = ro;
rw->flags = RW_REALIZED|RW_REALIZING;
cls->setData(rw);
}
...
supercls = realizeClass(remapClass(cls->superclass));
metacls = realizeClass(remapClass(cls->ISA()));
// Update superclass and metaclass in case of remapping
cls->superclass = supercls;
cls->initClassIsa(metacls);
// Reconcile instance variable offsets / layout.
// This may reallocate class_ro_t, updating our ro variable.
if (supercls && !isMeta) reconcileInstanceVariables(cls, supercls, ro);
// Set fastInstanceSize if it wasn't set already.
cls->setInstanceSize(ro->instanceSize);
// Copy some flags from ro to rw
if (ro->flags & RO_HAS_CXX_STRUCTORS) {
cls->setHasCxxDtor();
if (! (ro->flags & RO_HAS_CXX_DTOR_ONLY)) {
cls->setHasCxxCtor();
}
}
// Connect this class to its superclass's subclass lists
if (supercls) {
addSubclass(supercls, cls);
} else {
addRootClass(cls);
}
// Attach categories
methodizeClass(cls);
return cls;
}
realizeClass
函数会给类创建 class_rw_t *
的数据,并将 ro
里的一些东西拷贝过来,顺带给父类和元类也 realizeClass
一下。最后使用 methodizeClass
函数将 ro
中方法、属性这些信息也拷贝到类的 class_rw_t *
中来。
总结
总的来说,类需要先从 Mach-O 文件中 map 过来,再进行 realize。但其中环绕太多细节,这篇虎头蛇尾的文章就算一个随意的记录了。:-(
参考
iOS 程序 main 函数之前发生了什么 · sunnyxx的技术博客
Mach-O Executables · objc.io