一、dyld简介
dyld( the dynamic link editor 动态链接器),是苹果操作系统的一个重要的组成部分。在iOS/Mac OSX系统中,仅有很少量的进程只需要内核就能完成加载,基本上所有的进程都是动态链接的,所以Mach-O镜像文件中会有很多对外部的库和符号的引用,但是这些引用并不能直接用,在启动时还必须要通过这些引用进行内容的填补,这个填补工作就是由动态链接器dyld来完成的,也就是符号绑定。
下载源码
二、dyld流程分析
dyld的主要作用是加载Mach-O镜像文件,链接外部库和符号绑定。所有想要查看其内部方法执行顺序,需要在main函数执行前去分析。
那在load方法内添加断点,查看调用栈信息。
1、_dyld_start分析
通过栈信息发现最早执行的函数就是_dyld_start,进入_dyld_start查看汇编执行顺序
在dyld源码中全局搜索_dyld_start方法, 该方法做了底层环境区分,arm64的源码如下
#if __arm64__
.text
.align 2
.globl __dyld_start
__dyld_start:
mov x28, sp
and sp, x28, #~15 // force 16-byte alignment of stack
mov x0, #0
mov x1, #0
stp x1, x0, [sp, #-16]! // make aligned terminating frame
mov fp, sp // set up fp to point to terminating frame
sub sp, sp, #16 // make room for local variables
#if __LP64__
ldr x0, [x28] // get app's mh into x0
ldr x1, [x28, #8] // get argc into x1 (kernel passes 32-bit int argc as 64-bits on stack to keep alignment)
add x2, x28, #16 // get argv into x2
#else
ldr w0, [x28] // get app's mh into x0
ldr w1, [x28, #4] // get argc into x1 (kernel passes 32-bit int argc as 64-bits on stack to keep alignment)
add w2, w28, #8 // get argv into x2
#endif
adrp x3,___dso_handle@page
add x3,x3,___dso_handle@pageoff // get dyld's mh in to x4
mov x4,sp // x5 has &startGlue
// call dyldbootstrap::start(app_mh, argc, argv, dyld_mh, &startGlue)
bl __ZN13dyldbootstrap5startEPKN5dyld311MachOLoadedEiPPKcS3_Pm
mov x16,x0 // save entry point address in x16
#if __LP64__
ldr x1, [sp]
#else
ldr w1, [sp]
#endif
cmp x1, #0
b.ne Lnew
// LC_UNIXTHREAD way, clean up stack and jump to result
#if __LP64__
add sp, x28, #8 // restore unaligned stack pointer without app mh
#else
add sp, x28, #4 // restore unaligned stack pointer without app mh
#endif
#if __arm64e__
braaz x16 // jump to the program's entry point
#else
br x16 // jump to the program's entry point
#endif
// LC_MAIN case, set up stack for call to main()
Lnew: mov lr, x1 // simulate return address into _start in libdyld.dylib
#if __LP64__
ldr x0, [x28, #8] // main param1 = argc
add x1, x28, #16 // main param2 = argv
add x2, x1, x0, lsl #3
add x2, x2, #8 // main param3 = &env[0]
mov x3, x2
Lapple: ldr x4, [x3]
add x3, x3, #8
#else
ldr w0, [x28, #4] // main param1 = argc
add x1, x28, #8 // main param2 = argv
add x2, x1, x0, lsl #2
add x2, x2, #4 // main param3 = &env[0]
mov x3, x2
Lapple: ldr w4, [x3]
add x3, x3, #4
#endif
cmp x4, #0
b.ne Lapple // main param4 = apple
#if __arm64e__
braaz x16
#else
br x16
#endif
#endif // __arm64__
2、dyldbootstrap::start分析
_dyld_start汇编内调用dyldbootstrap::start,根据dyldbootstrap找到方法start的实现
//
// This is code to bootstrap dyld. This work in normally done for a program by dyld and crt.
// In dyld we have to do this manually.
//
uintptr_t start(const dyld3::MachOLoaded* appsMachHeader, int argc, const char* argv[],
const dyld3::MachOLoaded* dyldsMachHeader, uintptr_t* startGlue)
{
// Emit kdebug tracepoint to indicate dyld bootstrap has started
dyld3::kdebug_trace_dyld_marker(DBG_DYLD_TIMING_BOOTSTRAP_START, 0, 0, 0, 0);
// if kernel had to slide dyld, we need to fix up load sensitive locations
// we have to do this before using any global variables
rebaseDyld(dyldsMachHeader);
// kernel sets up env pointer to be just past end of agv array
const char** envp = &argv[argc+1];
// kernel sets up apple pointer to be just past end of envp array
const char** apple = envp;
while(*apple != NULL) { ++apple; }
++apple;
// set up random value for stack canary
__guard_setup(apple);
#if DYLD_INITIALIZER_SUPPORT
// run all C++ initializers inside dyld
runDyldInitializers(argc, argv, envp, apple);
#endif
// now that we are done bootstrapping dyld, call dyld's main
uintptr_t appsSlide = appsMachHeader->getSlide();
return dyld::_main((macho_header*)appsMachHeader, appsSlide, argc, argv, envp, apple, startGlue);
}
解释:
- 调用rebaseDyld() 修正Mach-O文件指针(基地址复位)
- __guard_setup 栈溢出保护
- 调用dyld::_main
2.1 rebaseDyld分析
源码实现
//
// On disk, all pointers in dyld's DATA segment are chained together.
// They need to be fixed up to be real pointers to run.
//
static void rebaseDyld(const dyld3::MachOLoaded* dyldMH)
{
// walk all fixups chains and rebase dyld
const dyld3::MachOAnalyzer* ma = (dyld3::MachOAnalyzer*)dyldMH;
assert(ma->hasChainedFixups());
uintptr_t slide = (long)ma; // all fixup chain based images have a base address of zero, so slide == load address
__block Diagnostics diag;
ma->withChainStarts(diag, 0, ^(const dyld_chained_starts_in_image* starts) {
ma->fixupAllChainedFixups(diag, starts, slide, dyld3::Array(), nullptr);
});
diag.assertNoError();
// now that rebasing done, initialize mach/syscall layer
mach_init();
// mark __DATA_CONST segment in dyld as read-only (once fixups are done)
ma->forEachSegment(^(const dyld3::MachOFile::SegmentInfo& info, bool& stop) {
if ( info.readOnlyData ) {
::mprotect(((uint8_t*)(dyldMH))+info.vmAddr, (size_t)info.vmSize, VM_PROT_READ);
}
});
}
解释:
Mach-O 文件都是固定不变的,但是每一次运行同一个方法的指针地址都不同,地址空间布局随机化处理(Address Space Layout Randomization,简称 ASLR)是为了更安全,增加分析应用代码成本。
dyld重定位元数据中的指针是比较耗时操作,在应用每次启动都会执行操作。如果可执行文件太大或者指针类型太多都会导致指针重定位耗时增加,所以在这个环节我们是可以做一些优化,也应该在平时开发过程注意代码的规范减少这些环境的耗时。
dyld重定位分析
ASLR和CodeSign
ASLR:是Address Space Layout Randomization(地址空间布局随机化)的简称。App在被启动的时候,程序会被映射到逻辑地址空间,这个逻辑地址空间有一个起始地址,ASLR技术让这个起始地址是随机的。这个地址如果是固定的,黑客很容易就用起始地址+函数偏移地址找到对应的函数地址。
Code Sign:就是苹果代码加密签名机制,但是在Code Sign操作的时候,加密的哈希不是针对整个文件,而是针对每一个Page的。这个就保证了dyld在加载的时候,可以对每个page进行独立的验证。
3、dyld::_main分析
dyld::_main源码太长,根据下面9个步骤拆解分析
3.1 环境变量配置
根据环境变量设置相应的值,获取当前运行的架构信息,判断dyld版本做处理
dyld3和dyld2的差异
//Check and see if there are any kernel flags (检查是否有任何内核标志)
dyld3::BootArgs::setFlags(hexToUInt64(_simple_getenv(apple, "dyld_flags"), nullptr));
// Grab the cdHash of the main executable from the environment (从环境中获取主可执行文件)
uint8_t mainExecutableCDHashBuffer[20];
const uint8_t* mainExecutableCDHash = nullptr;
if ( hexToBytes(_simple_getenv(apple, "executable_cdhash"), 40, mainExecutableCDHashBuffer) )
mainExecutableCDHash = mainExecutableCDHashBuffer;
// Set the platform ID in the all image infos so debuggers can tell the process type (在镜像信息中设置平台,这样调试器就可以告诉进程类型)
if (gProcessInfo->version >= 16) {
__block bool platformFound = false;
((dyld3::MachOFile*)mainExecutableMH)->forEachSupportedPlatform(^(dyld3::Platform platform, uint32_t minOS, uint32_t sdk) {
if (platformFound) {
halt("MH_EXECUTE binaries may only specify one platform");
}
gProcessInfo->platform = (uint32_t)platform;
platformFound = true;
});
}
CRSetCrashLogMessage("dyld: launch started");
setContext(mainExecutableMH, argc, argv, envp, apple);
// Pickup the pointer to the exec path. (提取指向exec路径)
sExecPath = _simple_getenv(apple, "executable_path");
// Remove interim apple[0] transition code from dyld
if (!sExecPath) sExecPath = apple[0];
#if __IPHONE_OS_VERSION_MIN_REQUIRED && !TARGET_OS_SIMULATOR
// kernel is not passing a real path for main executable (更新exec的全路径)
if ( strncmp(sExecPath, "/var/containers/Bundle/Application/", 35) == 0 ) {
if ( char* newPath = (char*)malloc(strlen(sExecPath)+10) ) {
strcpy(newPath, "/private");
strcat(newPath, sExecPath);
sExecPath = newPath;
}
}
#endif
if ( sExecPath[0] != '/' ) {
// have relative path, use cwd to make absolute
char cwdbuff[MAXPATHLEN];
if ( getcwd(cwdbuff, MAXPATHLEN) != NULL ) {
// maybe use static buffer to avoid calling malloc so early...
char* s = new char[strlen(cwdbuff) + strlen(sExecPath) + 2];
strcpy(s, cwdbuff);
strcat(s, "/");
strcat(s, sExecPath);
sExecPath = s;
}
}
// 检查环境变量 设置环境变量
checkEnvironmentVariables(envp);
// 设置回退路径
defaultUninitializedFallbackPaths(envp);
// 如果设置DYLD_PRINT_OPTS,打印参数
if ( sEnv.DYLD_PRINT_OPTS )
printOptions(argv);
// 如果设置DYLD_PRINT_ENV,打印环境变量
if ( sEnv.DYLD_PRINT_ENV )
printEnvironmentVariables(envp);
// 设置临时路径
const char* tempDir = getTempDir(envp);
if ( (tempDir != nullptr) && (geteuid() != 0) ) {
// Use realpath to prevent something like TMPRIR=/tmp/../usr/bin
char realPath[PATH_MAX];
if ( realpath(tempDir, realPath) != NULL )
tempDir = realPath;
if (strncmp(tempDir, "/private/var/mobile/Containers/", strlen("/private/var/mobile/Containers/")) == 0) {
sJustBuildClosure = true;
}
}
// dyld3 设置启动闭包模式
if ( sJustBuildClosure )
sClosureMode = ClosureMode::On;
// 获取当前运行环境的架构信息
getHostInfo(mainExecutableMH, mainExecutableSlide);
3.2 共享缓存
检查是否开启了共享缓存,创建启动闭包,加载共享缓存。
// load shared cache 检查共享缓存是否开启
checkSharedRegionDisable((dyld3::MachOLoaded*)mainExecutableMH, mainExecutableSlide);
if ( gLinkContext.sharedRegionMode != ImageLoader::kDontUseSharedRegion ) {
// 设置共享缓存配置参数
mapSharedCache();
}
/*
enum class ClosureMode {
// Unset means we haven't provided an env variable or boot-arg to explicitly choose a mode
Unset,
// On means we set DYLD_USE_CLOSURES=1, or we didn't have DYLD_USE_CLOSURES=0 but did have
// -force_dyld3=1 env variable or a customer cache on iOS
On,
// Off means we set DYLD_USE_CLOSURES=0, or we didn't have DYLD_USE_CLOSURES=1 but did have
// -force_dyld2=1 env variable or an internal cache on iOS
Off,
// PreBuiltOnly means only use a shared cache closure and don't try build a new one
PreBuiltOnly
};
*/
// If we haven't got a closure mode yet, then check the environment and cache type 检查环境和缓存类型
if ( sClosureMode == ClosureMode::Unset ) {
// First test to see if we forced in dyld2 via a kernel boot-arg
if ( dyld3::BootArgs::forceDyld2() ) {
sClosureMode = ClosureMode::Off;
} else if ( inDenyList(sExecPath) ) {
sClosureMode = ClosureMode::Off;
} else if ( sEnv.hasOverride ) {
sClosureMode = ClosureMode::Off;
} else if ( dyld3::BootArgs::forceDyld3() ) {
sClosureMode = ClosureMode::On;
} else {
sClosureMode = getPlatformDefaultClosureMode();
}
}
#if !TARGET_OS_SIMULATOR
if ( sClosureMode == ClosureMode::Off ) {
if ( gLinkContext.verboseWarnings )
dyld::log("dyld: not using closure because of DYLD_USE_CLOSURES or -force_dyld2=1 override\n");
} else {
const dyld3::closure::LaunchClosure* mainClosure = nullptr;
dyld3::closure::LoadedFileInfo mainFileInfo;
mainFileInfo.fileContent = mainExecutableMH;
mainFileInfo.path = sExecPath;
// FIXME: If we are saving this closure, this slice offset/length is probably wrong in the case of FAT files.
mainFileInfo.sliceOffset = 0;
mainFileInfo.sliceLen = -1;
struct stat mainExeStatBuf;
if ( ::stat(sExecPath, &mainExeStatBuf) == 0 ) {
mainFileInfo.inode = mainExeStatBuf.st_ino;
mainFileInfo.mtime = mainExeStatBuf.st_mtime;
}
// check for closure in cache first (首先检查共享缓存是否存在)
if ( sSharedCacheLoadInfo.loadAddress != nullptr ) {
// 根据可执行文件路径取出共享缓存包
mainClosure = sSharedCacheLoadInfo.loadAddress->findClosure(sExecPath);
if ( gLinkContext.verboseWarnings && (mainClosure != nullptr) )
dyld::log("dyld: found closure %p (size=%lu) in dyld shared cache\n", mainClosure, mainClosure->size());
}
// We only want to try build a closure at runtime if its an iOS third party binary, or a macOS binary from the shared cache (运行时构建闭包)
bool allowClosureRebuilds = false;
if ( sClosureMode == ClosureMode::On ) {
allowClosureRebuilds = true;
} else if ( (sClosureMode == ClosureMode::PreBuiltOnly) && (mainClosure != nullptr) ) {
allowClosureRebuilds = true;
}
if ( (mainClosure != nullptr) && !closureValid(mainClosure, mainFileInfo, mainExecutableCDHash, true, envp) )
mainClosure = nullptr;
// If we didn't find a valid cache closure then try build a new one (没有找到一个有效的缓存包,那么尝试构建一个新的)
if ( (mainClosure == nullptr) && allowClosureRebuilds ) {
// if forcing closures, and no closure in cache, or it is invalid, check for cached closure
if ( !sForceInvalidSharedCacheClosureFormat )
mainClosure = findCachedLaunchClosure(mainExecutableCDHash, mainFileInfo, envp);
if ( mainClosure == nullptr ) {
// if no cached closure found, build new one
mainClosure = buildLaunchClosure(mainExecutableCDHash, mainFileInfo, envp);
}
}
// exit dyld after closure is built, without running program
// (关闭后退出dyld,不运行程序)
if ( sJustBuildClosure )
_exit(EXIT_SUCCESS);
// try using launch closure
if ( mainClosure != nullptr ) {
CRSetCrashLogMessage("dyld3: launch started");
bool launched = launchWithClosure(mainClosure, sSharedCacheLoadInfo.loadAddress, (dyld3::MachOLoaded*)mainExecutableMH,
mainExecutableSlide, argc, argv, envp, apple, &result, startGlue);
if ( !launched && allowClosureRebuilds ) {
// closure is out of date, build new one
mainClosure = buildLaunchClosure(mainExecutableCDHash, mainFileInfo, envp);
if ( mainClosure != nullptr ) {
launched = launchWithClosure(mainClosure, sSharedCacheLoadInfo.loadAddress, (dyld3::MachOLoaded*)mainExecutableMH,
mainExecutableSlide, argc, argv, envp, apple, &result, startGlue);
}
}
if ( launched ) {
#if __has_feature(ptrauth_calls)
// start() calls the result pointer as a function pointer so we need to sign it.
result = (uintptr_t)__builtin_ptrauth_sign_unauthenticated((void*)result, 0, 0);
#endif
if (sSkipMain)
result = (uintptr_t)&fake_main;
return result;
}
else {
if ( gLinkContext.verboseWarnings ) {
dyld::log("dyld: unable to use closure %p\n", mainClosure);
}
}
}
}
在launchWithClosure方法内部,根据已知的共享缓存包, 取出共享缓存的所有镜像,取出镜像的方法表,记录加载的镜像。初始化allImage,allImage添加镜像。
3.3 主程序初始化(imageLoader)
调用instantiateFromLoadedImage函数实例化了一个ImageLoader对象
// instantiate ImageLoader for main executable (为可执行文件实例化ImageLoader)
sMainExecutable = instantiateFromLoadedImage(mainExecutableMH, mainExecutableSlide, sExecPath);
gLinkContext.mainExecutable = sMainExecutable;
gLinkContext.mainExecutableCodeSigned = hasCodeSignatureLoadCommand(mainExecutableMH);
3.4 插入动态库
遍历DYLD_INSERT_LIBRARIES环境变量,调用loadInsertedDylib加载
3.5 链接主程序
3.6 链接动态库
链接动态库在链接主程序之后,以便将所有动态库都能被插入
3.7 符号绑定
1、如果对应地址在共享缓存中,找到该镜像的符号绑定表直接使用。
2、主程序符号绑定,先绑定引用的库,再绑定镜像文件
3、绑定已插入镜像
4、符号绑定方法
3.8 执行初始化方法
查找runInitializers方法实现,在ImageLoader文件内找到其实现
初始化主要执行的方法为processInitializers
各个镜像初始化时先找到初始化方法,判断方法是否实现,执行各个镜像的初始化方法。
3.9 寻找主程序入口(main函数)
根据Mach-O文件查找main函数入口地址