dyld
dyld(the dynamic link editor)
是苹果的动态链接器,是苹果操作系统一个重要组成部分,在系统内核做好程序准备工作之后,交由dyld
负责余下的工作。而且它是开源的,任何人可以通过苹果官网下载它的源码来阅读理解它的运作方式,了解系统加载动态库的细节。
我们可以在这里下载源码
dyld流程
新建一个application,我们在ViewController中添加+ (void)load
方法,断掉调试,bt
打印出堆栈信息,最后一行显示
frame #12: 0x0000000105dd0025 dyld`_dyld_start + 37
我们在dyld
源码中全局搜索找到入口
我们通过注释来进行分析
找到
# call dyldbootstrap::start(app_mh, argc, argv, dyld_mh, &startGlue)
在dyldbootstrap
命名空间中找到start
方法
uintptr_t start(const dyld3::MachOLoaded* appsMachHeader, int argc, const char* argv[],
const dyld3::MachOLoaded* dyldsMachHeader, uintptr_t* startGlue)
{
// Emit kdebug tracepoint to indicate dyld bootstrap has started
dyld3::kdebug_trace_dyld_marker(DBG_DYLD_TIMING_BOOTSTRAP_START, 0, 0, 0, 0);
// if kernel had to slide dyld, we need to fix up load sensitive locations
// we have to do this before using any global variables
//dyld重定位
rebaseDyld(dyldsMachHeader);
// kernel sets up env pointer to be just past end of agv array
const char** envp = &argv[argc+1];
// kernel sets up apple pointer to be just past end of envp array
const char** apple = envp;
while(*apple != NULL) { ++apple; }
++apple;
// set up random value for stack canary
//栈溢出保护
__guard_setup(apple);
#if DYLD_INITIALIZER_SUPPORT
// run all C++ initializers inside dyld
runDyldInitializers(argc, argv, envp, apple);
#endif
// now that we are done bootstrapping dyld, call dyld's main
uintptr_t appsSlide = appsMachHeader->getSlide();
return dyld::_main((macho_header*)appsMachHeader, appsSlide, argc, argv, envp, apple, startGlue);
}
初始化dyld
后,调用dyld::_main()
dyld::_main()
代码有600+行,我们一步步分析
1. 环境变量配置
///从环境中获取主可执行文件的cdHash
uint8_t mainExecutableCDHashBuffer[20];
const uint8_t* mainExecutableCDHash = nullptr;
if ( hexToBytes(_simple_getenv(apple, "executable_cdhash"), 40, mainExecutableCDHashBuffer) )
mainExecutableCDHash = mainExecutableCDHashBuffer;
// 获取主程序的macho_header
sMainExecutableMachHeader = mainExecutableMH;
// 获取主程序的slide值
sMainExecutableSlide = mainExecutableSlide;
// 设置上下文信息
setContext(mainExecutableMH, argc, argv, envp, apple);
// Pickup the pointer to the exec path.
//获取主程序路径
sExecPath = _simple_getenv(apple, "executable_path");
//进程的头环境配置
configureProcessRestrictions(mainExecutableMH, envp);
//检测环境变量
checkEnvironmentVariables(envp);
defaultUninitializedFallbackPaths(envp);
// 获取当前运行环境架构的信息
getHostInfo(mainExecutableMH, mainExecutableSlide);
2.共享缓存
检查是否开启了共享缓存,以及共享缓存是否映射到共享区域,例如UIKit
、CoreFoundation
等
// load shared cache
// 检测共享缓存是否开启,在iOS中必须开启
checkSharedRegionDisable((dyld3::MachOLoaded*)mainExecutableMH, mainExecutableSlide);
if ( gLinkContext.sharedRegionMode != ImageLoader::kDontUseSharedRegion ) {
#if TARGET_OS_SIMULATOR
if ( sSharedCacheOverrideDir)
///检测共享缓存是否映射到了共享区域
mapSharedCache();
#else
mapSharedCache();
#endif
static void mapSharedCache()
{
dyld3::SharedCacheOptions opts;
opts.cacheDirOverride = sSharedCacheOverrideDir;
opts.forcePrivate = (gLinkContext.sharedRegionMode == ImageLoader::kUsePrivateSharedRegion);
#if __x86_64__ && !TARGET_OS_SIMULATOR
opts.useHaswell = sHaswell;
#else
opts.useHaswell = false;
#endif
opts.verbose = gLinkContext.verboseMapping;
// 加载dyld缓存
loadDyldCache(opts, &sSharedCacheLoadInfo);
// update global state
if ( sSharedCacheLoadInfo.loadAddress != nullptr ) {
gLinkContext.dyldCache = sSharedCacheLoadInfo.loadAddress;
dyld::gProcessInfo->processDetachedFromSharedRegion = opts.forcePrivate;
dyld::gProcessInfo->sharedCacheSlide = sSharedCacheLoadInfo.slide;
dyld::gProcessInfo->sharedCacheBaseAddress = (unsigned long)sSharedCacheLoadInfo.loadAddress;
sSharedCacheLoadInfo.loadAddress->getUUID(dyld::gProcessInfo->sharedCacheUUID);
dyld3::kdebug_trace_dyld_image(DBG_DYLD_UUID_SHARED_CACHE_A, sSharedCacheLoadInfo.path, (const uuid_t *)&dyld::gProcessInfo->sharedCacheUUID[0], {0,0}, {{ 0, 0 }}, (const mach_header *)sSharedCacheLoadInfo.loadAddress);
}
}
bool loadDyldCache(const SharedCacheOptions& options, SharedCacheLoadInfo* results)
{
results->loadAddress = 0;
results->slide = 0;
results->errorMessage = nullptr;
#if TARGET_OS_SIMULATOR
// simulator only supports mmap()ing cache privately into process
//模拟器只支持mmap()将缓存私有地放入进程中
return mapCachePrivate(options, results);
#else
if ( options.forcePrivate ) {
// mmap cache into this process only
//只缓存到这个进程中
return mapCachePrivate(options, results);
}
else {
// fast path: when cache is already mapped into shared region
//快速路径:当缓存已经映射到共享区域时
bool hasError = false;
if ( reuseExistingCache(options, results) ) {
hasError = (results->errorMessage != nullptr);
} else {
// slow path: this is first process to load cache
//慢路径:这是第一个加载缓存的进程
hasError = mapCacheSystemWide(options, results);
}
return hasError;
}
#endif
}
3. 主程序的初始化
/// 实例化主程序
/// 加载可执行文件,并生成一个ImageLoader
sMainExecutable = instantiateFromLoadedImage(mainExecutableMH, mainExecutableSlide, sExecPath);
gLinkContext.mainExecutable = sMainExecutable;
gLinkContext.mainExecutableCodeSigned = hasCodeSignatureLoadCommand(mainExecutableMH);
4. 插入动态库
//加载所有DYLD_INSERT_LIBRARIES指定的库
if ( sEnv.DYLD_INSERT_LIBRARIES != NULL ) {
for (const char* const* lib = sEnv.DYLD_INSERT_LIBRARIES; *lib != NULL; ++lib)
loadInsertedDylib(*lib);
}
// record count of inserted libraries so that a flat search will look at
//记录插入的库的数量,以便进行统一搜索
// inserted libraries, then main, then others.
//插入库,然后是main,然后是其他
sInsertedDylibCount = sAllImages.size()-1;
5. link 主程序
// 链接主程序
link(sMainExecutable, sEnv.DYLD_BIND_AT_LAUNCH, true, ImageLoader::RPathChain(NULL, NULL), -1);
6. link 动态库
// link any inserted libraries
// do this after linking main executable so that any dylibs pulled in by inserted
// dylibs (e.g. libSystem) will not be in front of dylibs the program uses
// 链接插入动态库
// 链接主可执行文件猴执行此操作,这样插入的dylibs (e.g. libSystem)不会再程序使用的dylib前面
if ( sInsertedDylibCount > 0 ) {
for(unsigned int i=0; i < sInsertedDylibCount; ++i) {
ImageLoader* image = sAllImages[i+1];
link(image, sEnv.DYLD_BIND_AT_LAUNCH, true, ImageLoader::RPathChain(NULL, NULL), -1);
image->setNeverUnloadRecursive();
}
if ( gLinkContext.allowInterposing ) {
// only INSERTED libraries can interpose 只有插入的库可以插入
// register interposing info after all inserted libraries are bound so chaining works
//绑定所有插入的库猴,注入插入信息,以便链接工作
for(unsigned int i=0; i < sInsertedDylibCount; ++i) {
ImageLoader* image = sAllImages[i+1];
// 驻车符号插入
image->registerInterposing(gLinkContext);
}
}
}
7. 弱引用绑定
// Bind and notify for the main executable now that interposing has been registered
//现在已经注册了插入操作,为主可执行文件绑定和通知
uint64_t bindMainExecutableStartTime = mach_absolute_time();
sMainExecutable->recursiveBindWithAccounting(gLinkContext, sEnv.DYLD_BIND_AT_LAUNCH, true);
uint64_t bindMainExecutableEndTime = mach_absolute_time();
ImageLoaderMachO::fgTotalBindTime += bindMainExecutableEndTime - bindMainExecutableStartTime;
gLinkContext.notifyBatch(dyld_image_state_bound, false);
// Bind and notify for the inserted images now interposing has been registered
if ( sInsertedDylibCount > 0 ) {
for(unsigned int i=0; i < sInsertedDylibCount; ++i) {
ImageLoader* image = sAllImages[i+1];
image->recursiveBind(gLinkContext, sEnv.DYLD_BIND_AT_LAUNCH, true);
}
}
// do weak binding only after all inserted images linked
//弱符号绑定
sMainExecutable->weakBind(gLinkContext);
gLinkContext.linkingMainExecutable = false;
8. 执行初始化方法
#if SUPPORT_OLD_CRT_INITIALIZATION
// Old way is to run initializers via a callback from crt1.o
//旧的方法是通过crt1.o的回调来运行初始化器
if ( ! gRunInitializersOldWay )
initializeMainExecutable();
#else
// run all initializers
//运行所有初始化项
initializeMainExecutable();
#endif
void initializeMainExecutable()
{
// record that we've reached this step
gLinkContext.startedInitializingMainExecutable = true;
// run initialzers for any inserted dylibs
ImageLoader::InitializerTimingList initializerTimes[allImagesCount()];
initializerTimes[0].count = 0;
const size_t rootCount = sImageRoots.size();
if ( rootCount > 1 ) {
for(size_t i=1; i < rootCount; ++i) {
sImageRoots[i]->runInitializers(gLinkContext, initializerTimes[0]);
}
}
// run initializers for main executable and everything it brings up
sMainExecutable->runInitializers(gLinkContext, initializerTimes[0]);
// register cxa_atexit() handler to run static terminators in all loaded images when this process exits
if ( gLibSystemHelpers != NULL )
(*gLibSystemHelpers->cxa_atexit)(&runAllStaticTerminators, NULL, NULL);
// dump info if requested
if ( sEnv.DYLD_PRINT_STATISTICS )
ImageLoader::printStatistics((unsigned int)allImagesCount(), initializerTimes[0]);
if ( sEnv.DYLD_PRINT_STATISTICS_DETAILS )
ImageLoaderMachO::printStatisticsDetails((unsigned int)allImagesCount(), initializerTimes[0]);
}
全局搜索runInitializers(const
void ImageLoader::runInitializers(const LinkContext& context, InitializerTimingList& timingInfo)
{
uint64_t t1 = mach_absolute_time();
mach_port_t thisThread = mach_thread_self();
ImageLoader::UninitedUpwards up;
up.count = 1;
up.imagesAndPaths[0] = { this, this->getPath() };
processInitializers(context, thisThread, timingInfo, up);
context.notifyBatch(dyld_image_state_initialized, false);
mach_port_deallocate(mach_task_self(), thisThread);
uint64_t t2 = mach_absolute_time();
fgTotalInitTime += (t2 - t1);
}
查看processInitializers
void ImageLoader::processInitializers(const LinkContext& context, mach_port_t thisThread,
InitializerTimingList& timingInfo, ImageLoader::UninitedUpwards& images)
{
uint32_t maxImageCount = context.imageCount()+2;
ImageLoader::UninitedUpwards upsBuffer[maxImageCount];
ImageLoader::UninitedUpwards& ups = upsBuffer[0];
ups.count = 0;
// Calling recursive init on all images in images list, building a new list of
// uninitialized upward dependencies.
for (uintptr_t i=0; i < images.count; ++i) {
images.imagesAndPaths[i].first->recursiveInitialization(context, thisThread, images.imagesAndPaths[i].second, timingInfo, ups);
}
// If any upward dependencies remain, init them.
if ( ups.count > 0 )
processInitializers(context, thisThread, timingInfo, ups);
}
void ImageLoader::recursiveInitialization(const LinkContext& context, mach_port_t this_thread, const char* pathToInitialize,
InitializerTimingList& timingInfo, UninitedUpwards& uninitUps)
{
recursive_lock lock_info(this_thread);
recursiveSpinLock(lock_info);//递归加锁
if ( fState < dyld_image_state_dependents_initialized-1 ) {
uint8_t oldState = fState;
// break cycles 结束递归循环
fState = dyld_image_state_dependents_initialized-1;
try {
// initialize lower level libraries first
...
// record termination order
if ( this->needsTermination() )
context.terminationRecorder(this);
// let objc know we are about to initialize this image
// 让objc知道我们将要初始化此镜像
uint64_t t1 = mach_absolute_time();
fState = dyld_image_state_dependents_initialized;
oldState = fState;
context.notifySingle(dyld_image_state_dependents_initialized, this, &timingInfo);
// initialize this image
// 初始化此镜像
bool hasInitializers = this->doInitialization(context);
// let anyone know we finished initializing this image
///让所有人知道我们已经完成了这个映像的初始化
fState = dyld_image_state_initialized;
oldState = fState;
context.notifySingle(dyld_image_state_initialized, this, NULL);
...
}
...
}
recursiveSpinUnLock();
}
我们先来看notifySingle
static void notifySingle(dyld_image_states state, const ImageLoader* image, ImageLoader::InitializerTimingList* timingInfo)
{
//dyld::log("notifySingle(state=%d, image=%s)\n", state, image->getPath());
std::vector* handlers = stateToHandlers(state, sSingleHandlers);
if ( handlers != NULL ) {
dyld_image_info info;
info.imageLoadAddress = image->machHeader();
info.imageFilePath = image->getRealPath();
info.imageFileModDate = image->lastModified();
for (std::vector::iterator it = handlers->begin(); it != handlers->end(); ++it) {
const char* result = (*it)(state, 1, &info);
if ( (result != NULL) && (state == dyld_image_state_mapped) ) {
//fprintf(stderr, " image rejected by handler=%p\n", *it);
// make copy of thrown string so that later catch clauses can free it
const char* str = strdup(result);
throw str;
}
}
}
// 是否被映射进来
if ( state == dyld_image_state_mapped ) {
// Save load addr + UUID for images from outside the shared cache
if ( !image->inSharedCache() ) {
dyld_uuid_info info;
if ( image->getUUID(info.imageUUID) ) {
info.imageLoadAddress = image->machHeader();//头部信息处理
addNonSharedCacheImageUUID(info);
}
}
}
if ( (state == dyld_image_state_dependents_initialized) && (sNotifyObjCInit != NULL) && image->notifyObjC() ) {
uint64_t t0 = mach_absolute_time();
dyld3::ScopedTimer timer(DBG_DYLD_TIMING_OBJC_INIT, (uint64_t)image->machHeader(), 0, 0);
(*sNotifyObjCInit)(image->getRealPath(), image->machHeader());
uint64_t t1 = mach_absolute_time();
uint64_t t2 = mach_absolute_time();
uint64_t timeInObjC = t1-t0;
uint64_t emptyTime = (t2-t1)*100;
if ( (timeInObjC > emptyTime) && (timingInfo != NULL) ) {
timingInfo->addTime(image->getShortName(), timeInObjC);
}
}
// mach message csdlc about dynamically unloaded images
//关于动态未加载镜像的mach消息csdlc
if ( image->addFuncNotified() && (state == dyld_image_state_terminated) ) {
notifyKernel(*image, false);
const struct mach_header* loadAddress[] = { image->machHeader() };
const char* loadPath[] = { image->getPath() };
notifyMonitoringDyld(true, 1, loadAddress, loadPath);
}
}
最后我们找到了(*sNotifyObjCInit)
,全局搜索,在registerObjCNotifiers
中找到了对sNotifyObjCInit
的赋值
void registerObjCNotifiers(_dyld_objc_notify_mapped mapped, _dyld_objc_notify_init init, _dyld_objc_notify_unmapped unmapped)
{
// record functions to call
sNotifyObjCMapped = mapped;
sNotifyObjCInit = init;
sNotifyObjCUnmapped = unmapped;
...
}
void _dyld_objc_notify_register(_dyld_objc_notify_mapped mapped,
_dyld_objc_notify_init init,
_dyld_objc_notify_unmapped unmapped)
{
dyld::registerObjCNotifiers(mapped, init, unmapped);
}
这个时候我们就要去objc4-781
源码中找到_dyld_objc_notify_register
void _objc_init(void)
{
static bool initialized = false;
if (initialized) return;
initialized = true;
// fixme defer initialization until an objc-using image is found?
environ_init();
tls_init();
static_init();
runtime_init();
exception_init();
cache_init();
_imp_implementationWithBlock_init();
_dyld_objc_notify_register(&map_images, load_images, unmap_image);
#if __OBJC2__
didCallDyldNotifyRegister = true;
#endif
}
在这里我们看到是把load_images
赋给sNotifyObjCInit
,而load_images
会调用所有的+load
方法,所以notifySingle
是个回调函数
doInitialization 函数
bool ImageLoaderMachO::doInitialization(const LinkContext& context)
{
CRSetCrashLogMessage2(this->getPath());
// mach-o has -init and static initializers
// mach-o有-init和静态初始化器
doImageInit(context);
doModInitFunctions(context);
CRSetCrashLogMessage2(NULL);
return (fHasDashInit || fHasInitializers);
}
-
doImageInit
for循环加载所有方法调用,libSystem
的初始化必须先执行
void ImageLoaderMachO::doImageInit(const LinkContext& context)
{
if ( fHasDashInit ) {
const uint32_t cmd_count = ((macho_header*)fMachOData)->ncmds;
const struct load_command* const cmds = (struct load_command*)&fMachOData[sizeof(macho_header)];
const struct load_command* cmd = cmds;
for (uint32_t i = 0; i < cmd_count; ++i) {//方法的调用
switch (cmd->cmd) {
case LC_ROUTINES_COMMAND:
// Mach-O地址平移,得到一个函数方法
Initializer func = (Initializer)(((struct macho_routines_command*)cmd)->init_address + fSlide);
#if __has_feature(ptrauth_calls)
func = (Initializer)__builtin_ptrauth_sign_unauthenticated((void*)func, ptrauth_key_asia, 0);
#endif
// verify initializers are in image//验证初始化程序在镜像中
if ( ! this->containsAddress(stripPointer((void*)func)) ) {
dyld::throwf("initializer function %p not in mapped image for %s\n", func, this->getPath());
}
if ( ! dyld::gProcessInfo->libSystemInitialized ) {
// libSystem initializer must run first
//libSystem必须先初始化,优先级最高
dyld::throwf("-init function in image (%s) that does not link with libSystem.dylib\n", this->getPath());
}
if ( context.verboseInit )
dyld::log("dyld: calling -init function %p in %s\n", func, this->getPath());
{
dyld3::ScopedTimer(DBG_DYLD_TIMING_STATIC_INITIALIZER, (uint64_t)fMachOData, (uint64_t)func, 0);
func(context.argc, context.argv, context.envp, context.apple, &context.programVars);
}
break;
}
cmd = (const struct load_command*)(((char*)cmd)+cmd->cmdsize);
}
}
}
-
doModInitFunctions
调用所有的cxx方法
我们验证一下在c++方法中加上断点
我们在_objc_init 方法中加上断点,可以看到
结合上面的分析,从初始化_objc_init注册的_dyld_objc_notify_register的参数2,即load_images,到sNotifySingle --> sNotifyObjCInie=参数2 到sNotifyObjcInit()调用,形成了一个闭环
所以可以简单的理解为sNotifySingle这里是添加通知即addObserver,_objc_init中调用_dyld_objc_notify_register相当于发送通知,即push,而sNotifyObjcInit相当于通知的处理函数,即selector
_dyld_start
--> dyldbootstrap::start
--> dyld::_main
--> dyld::initializeMainExecutable
--> ImageLoader::runInitializers
--> ImageLoader::processInitializers
--> ImageLoader::recursiveInitialization
--> doInitialization
-->libSystem_initializer
(libSystem.B.dylib) --> _os_object_init
(libdispatch.dylib) --> _objc_init
(libobjc.A.dylib)
9.找到主程序入口main
,从Load Command
读取LC_MAIN
入口,如果没有,就读取LC_UNIXTHREAD
// find entry point for main executable
// 找到main入口并执行
result = (uintptr_t)sMainExecutable->getEntryFromLC_MAIN();
if ( result != 0 ) {
// main executable uses LC_MAIN, we need to use helper in libdyld to call into main()
// main可执行文件使用LC_MAIN,我们需要使用libdyld中的helper函数来调用main()
if ( (gLibSystemHelpers != NULL) && (gLibSystemHelpers->version >= 9) )
*startGlue = (uintptr_t)gLibSystemHelpers->startGlueToCallExit;
else
halt("libdyld.dylib support not present for LC_MAIN");
}
else {
// main executable uses LC_UNIXTHREAD, dyld needs to let "start" in program set up for main()
//主可执行文件使用LC_UNIXTHREAD, dyld需要让“start”在程序设置为main()
result = (uintptr_t)sMainExecutable->getEntryFromLC_UNIXTHREAD();
*startGlue = 0;
}