如果一个so没有加载,系统首先会调用dlopen函数处理库文件。以下源码来自android6.0.1
static void* dlopen_ext(const char* filename, int flags, const android_dlextinfo* extinfo) {
ScopedPthreadMutexLocker locker(&g_dl_mutex);
soinfo* result = do_dlopen(filename, flags, extinfo);
if (result == nullptr) {
__bionic_format_dlerror("dlopen failed", linker_get_error_buffer());
return nullptr;
}
return result;
}
void* dlopen(const char* filename, int flags) {
return dlopen_ext(filename, flags, nullptr);
}
dlopen_ext函数会继续调用do_dlopen函数
if ((flags & ~(RTLD_NOW|RTLD_LAZY|RTLD_LOCAL|RTLD_GLOBAL|RTLD_NODELETE|RTLD_NOLOAD)) != 0) {
DL_ERR("invalid flags to dlopen: %x", flags);
return nullptr;
}
if (extinfo != nullptr) {
if ((extinfo->flags & ~(ANDROID_DLEXT_VALID_FLAG_BITS)) != 0) {
DL_ERR("invalid extended flags to android_dlopen_ext: 0x%" PRIx64, extinfo->flags);
return nullptr;
}
if ((extinfo->flags & ANDROID_DLEXT_USE_LIBRARY_FD) == 0 &&
(extinfo->flags & ANDROID_DLEXT_USE_LIBRARY_FD_OFFSET) != 0) {
DL_ERR("invalid extended flag combination (ANDROID_DLEXT_USE_LIBRARY_FD_OFFSET without "
"ANDROID_DLEXT_USE_LIBRARY_FD): 0x%" PRIx64, extinfo->flags);
return nullptr;
}
}
ProtectedDataGuard guard;
soinfo* si = find_library(name, flags, extinfo);
if (si != nullptr) {
si->call_constructors();
}
return si;
}
函数首先会对参数进行一些判断,随后调用find_library函数
static soinfo* find_library(const char* name, int rtld_flags, const android_dlextinfo* extinfo) {
soinfo* si;
if (name == nullptr) {
si = somain;
} else if (!find_libraries(nullptr, &name, 1, &si, nullptr, 0, rtld_flags, extinfo)) {
return nullptr;
}
return si;
}
find_libraries函数会调用find_library_internal函数,在find_library_internal中
static soinfo* find_library_internal(LoadTaskList& load_tasks, const char* name,
int rtld_flags, const android_dlextinfo* extinfo) {
soinfo* candidate;
if (find_loaded_library_by_soname(name, &candidate)) {
return candidate;
}
// Library might still be loaded, the accurate detection
// of this fact is done by load_library.
TRACE("[ '%s' find_loaded_library_by_soname returned false (*candidate=%s@%p). Trying harder...]",
name, candidate == nullptr ? "n/a" : candidate->get_realpath(), candidate);
soinfo* si = load_library(load_tasks, name, rtld_flags, extinfo);
// In case we were unable to load the library but there
// is a candidate loaded under the same soname but different
// sdk level - return it anyways.
if (si == nullptr && candidate != nullptr) {
si = candidate;
}
return si;
}
load_library进行真正的加载,返回一个soinfo结构体指针。该结构体存储了so的信息。
OK!!
目光回到do_dlopen函数,si->call_constructors();
void soinfo::call_constructors() {
if (constructors_called) {
return;
}
// We set constructors_called before actually calling the constructors, otherwise it doesn't
// protect against recursive constructor calls. One simple example of constructor recursion
// is the libc debug malloc, which is implemented in libc_malloc_debug_leak.so:
// 1. The program depends on libc, so libc's constructor is called here.
// 2. The libc constructor calls dlopen() to load libc_malloc_debug_leak.so.
// 3. dlopen() calls the constructors on the newly created
// soinfo for libc_malloc_debug_leak.so.
// 4. The debug .so depends on libc, so CallConstructors is
// called again with the libc soinfo. If it doesn't trigger the early-
// out above, the libc constructor will be called again (recursively!).
constructors_called = true;
if (!is_main_executable() && preinit_array_ != nullptr) {
// The GNU dynamic linker silently ignores these, but we warn the developer.
PRINT("\"%s\": ignoring %zd-entry DT_PREINIT_ARRAY in shared library!",
get_realpath(), preinit_array_count_);
}
get_children().for_each([] (soinfo* si) {
si->call_constructors();
});
TRACE("\"%s\": calling constructors", get_realpath());
// DT_INIT should be called before DT_INIT_ARRAY if both are present.
call_function("DT_INIT", init_func_);
call_array("DT_INIT_ARRAY", init_array_, init_array_count_, false);
}
call_function("DT_INIT", init_func_);
call_array("DT_INIT_ARRAY", init_array_, init_array_count_, false);
下面我们看一下call_function和call_array函数的实现:
void soinfo::call_array(const char* array_name __unused, linker_function_t* functions,
size_t count, bool reverse) {
if (functions == nullptr) {
return;
}
TRACE("[ Calling %s (size %zd) @ %p for '%s' ]", array_name, count, functions, get_realpath());
int begin = reverse ? (count - 1) : 0;
int end = reverse ? -1 : count;
int step = reverse ? -1 : 1;
for (int i = begin; i != end; i += step) {
TRACE("[ %s[%d] == %p ]", array_name, i, functions[i]);
call_function("function", functions[i]);
}
TRACE("[ Done calling %s for '%s' ]", array_name, get_realpath());
}
void soinfo::call_function(const char* function_name __unused, linker_function_t function) {
if (function == nullptr || reinterpret_cast(function) == static_cast(-1)) {
return;
}
TRACE("[ Calling %s @ %p for '%s' ]", function_name, function, get_realpath());
function();
TRACE("[ Done calling %s @ %p for '%s' ]", function_name, function, get_realpath());
}
可以看到最终就是调用init_func_和init_array_,init_func_和init_array_又是什么,是如何赋值的呢?
switch (d->d_tag)
..........
case DT_INIT:
init_func_ = reinterpret_cast(load_bias + d->d_un.d_ptr);
DEBUG("%s constructors (DT_INIT) found at %p", get_realpath(), init_func_);
break;
case DT_INIT_ARRAY:
init_array_ = reinterpret_cast(load_bias + d->d_un.d_ptr);
DEBUG("%s constructors (DT_INIT_ARRAY) found at %p", get_realpath(), init_array_);
break;
要解释上面的代码还要涉及到elf文件的结构,DT_INIT和DT_INIT_ARRAY均为动态节区.dynamic中结构体的d_tag值,结构体如下所示:(DT_INIT为可选)
typedef struct {
Elf32_Sword d_tag;
union {
Elf32_Word d_val;
Elf32_Addr d_ptr;
} d_un;
} Elf32_Dyn;
其中
defineDT_INIT 12 /* Address of initialization function /
define DT_INIT_ARRAY 25 / Address of initialization function array */
上面的函数中d->d_un.d_ptr的d其实是Elf32_Dyn的指针。
联合体d_run存储的是虚拟地址,init_func_和init_array_的地址就是.init和.init_array节的地址。
所以,call_function("DT_INIT", init_func_);call_array("DT_INIT_ARRAY", init_array_, init_array_count_, false)最终调用的是.init和.init_array节区的代码。
所以,逻辑就走完了,在调用dlopen函数的同时不仅仅是加载库文件还会调用.init和.inti_array节区的代码。这一点在脱壳中尤为重要!