Android 加载so的函数调用

如果一个so没有加载,系统首先会调用dlopen函数处理库文件。以下源码来自android6.0.1

static void* dlopen_ext(const char* filename, int flags, const android_dlextinfo* extinfo) {

  ScopedPthreadMutexLocker locker(&g_dl_mutex);

  soinfo* result = do_dlopen(filename, flags, extinfo);

  if (result == nullptr) {

        __bionic_format_dlerror("dlopen failed", linker_get_error_buffer());

        return nullptr;
  }

  return result;

}


void* dlopen(const char* filename, int flags) {

  return dlopen_ext(filename, flags, nullptr);

}

dlopen_ext函数会继续调用do_dlopen函数


  if ((flags & ~(RTLD_NOW|RTLD_LAZY|RTLD_LOCAL|RTLD_GLOBAL|RTLD_NODELETE|RTLD_NOLOAD)) != 0) {

          DL_ERR("invalid flags to dlopen: %x", flags);
          return nullptr;
  }

  if (extinfo != nullptr) {

        if ((extinfo->flags & ~(ANDROID_DLEXT_VALID_FLAG_BITS)) != 0) {
        DL_ERR("invalid extended flags to android_dlopen_ext: 0x%" PRIx64, extinfo->flags);
        return nullptr;
      }
      if ((extinfo->flags & ANDROID_DLEXT_USE_LIBRARY_FD) == 0 &&
        (extinfo->flags & ANDROID_DLEXT_USE_LIBRARY_FD_OFFSET) != 0) {
        DL_ERR("invalid extended flag combination (ANDROID_DLEXT_USE_LIBRARY_FD_OFFSET without "
          "ANDROID_DLEXT_USE_LIBRARY_FD): 0x%" PRIx64, extinfo->flags);
        return nullptr;
        }
  }

  ProtectedDataGuard guard;

  soinfo* si = find_library(name, flags, extinfo);

  if (si != nullptr) {

      si->call_constructors();
  }

  return si;

}

函数首先会对参数进行一些判断,随后调用find_library函数

static soinfo* find_library(const char* name, int rtld_flags, const android_dlextinfo* extinfo) {
  soinfo* si;

  if (name == nullptr) {
    si = somain;
  } else if (!find_libraries(nullptr, &name, 1, &si, nullptr, 0, rtld_flags, extinfo)) {
    return nullptr;
  }

  return si;
}

find_libraries函数会调用find_library_internal函数,在find_library_internal中

static soinfo* find_library_internal(LoadTaskList& load_tasks, const char* name,
                                     int rtld_flags, const android_dlextinfo* extinfo) {
  soinfo* candidate;

  if (find_loaded_library_by_soname(name, &candidate)) {
    return candidate;
  }

  // Library might still be loaded, the accurate detection
  // of this fact is done by load_library.
  TRACE("[ '%s' find_loaded_library_by_soname returned false (*candidate=%s@%p). Trying harder...]",
      name, candidate == nullptr ? "n/a" : candidate->get_realpath(), candidate);

  soinfo* si = load_library(load_tasks, name, rtld_flags, extinfo);

  // In case we were unable to load the library but there
  // is a candidate loaded under the same soname but different
  // sdk level - return it anyways.
  if (si == nullptr && candidate != nullptr) {
    si = candidate;
  }

  return si;
}

load_library进行真正的加载,返回一个soinfo结构体指针。该结构体存储了so的信息。

OK!!
目光回到do_dlopen函数,si->call_constructors();

void soinfo::call_constructors() {
  if (constructors_called) {
    return;
  }

  // We set constructors_called before actually calling the constructors, otherwise it doesn't
  // protect against recursive constructor calls. One simple example of constructor recursion
  // is the libc debug malloc, which is implemented in libc_malloc_debug_leak.so:
  // 1. The program depends on libc, so libc's constructor is called here.
  // 2. The libc constructor calls dlopen() to load libc_malloc_debug_leak.so.
  // 3. dlopen() calls the constructors on the newly created
  //    soinfo for libc_malloc_debug_leak.so.
  // 4. The debug .so depends on libc, so CallConstructors is
  //    called again with the libc soinfo. If it doesn't trigger the early-
  //    out above, the libc constructor will be called again (recursively!).
  constructors_called = true;

  if (!is_main_executable() && preinit_array_ != nullptr) {
    // The GNU dynamic linker silently ignores these, but we warn the developer.
    PRINT("\"%s\": ignoring %zd-entry DT_PREINIT_ARRAY in shared library!",
          get_realpath(), preinit_array_count_);
  }

  get_children().for_each([] (soinfo* si) {
    si->call_constructors();
  });

  TRACE("\"%s\": calling constructors", get_realpath());

  // DT_INIT should be called before DT_INIT_ARRAY if both are present.
  call_function("DT_INIT", init_func_);
  call_array("DT_INIT_ARRAY", init_array_, init_array_count_, false);
}

call_function("DT_INIT", init_func_);
call_array("DT_INIT_ARRAY", init_array_, init_array_count_, false);

下面我们看一下call_function和call_array函数的实现:

void soinfo::call_array(const char* array_name __unused, linker_function_t* functions,
                        size_t count, bool reverse) {
  if (functions == nullptr) {
    return;
  }

  TRACE("[ Calling %s (size %zd) @ %p for '%s' ]", array_name, count, functions, get_realpath());

  int begin = reverse ? (count - 1) : 0;
  int end = reverse ? -1 : count;
  int step = reverse ? -1 : 1;

  for (int i = begin; i != end; i += step) {
    TRACE("[ %s[%d] == %p ]", array_name, i, functions[i]);
    call_function("function", functions[i]);
  }

  TRACE("[ Done calling %s for '%s' ]", array_name, get_realpath());
}

void soinfo::call_function(const char* function_name __unused, linker_function_t function) {
  if (function == nullptr || reinterpret_cast(function) == static_cast(-1)) {
    return;
  }

  TRACE("[ Calling %s @ %p for '%s' ]", function_name, function, get_realpath());
  function();
  TRACE("[ Done calling %s @ %p for '%s' ]", function_name, function, get_realpath());
}

可以看到最终就是调用init_func_和init_array_,init_func_和init_array_又是什么,是如何赋值的呢?

switch (d->d_tag) 
..........
case DT_INIT:
        init_func_ = reinterpret_cast(load_bias + d->d_un.d_ptr);
        DEBUG("%s constructors (DT_INIT) found at %p", get_realpath(), init_func_);
        break;

case DT_INIT_ARRAY:
        init_array_ = reinterpret_cast(load_bias + d->d_un.d_ptr);
        DEBUG("%s constructors (DT_INIT_ARRAY) found at %p", get_realpath(), init_array_);
        break;

要解释上面的代码还要涉及到elf文件的结构,DT_INIT和DT_INIT_ARRAY均为动态节区.dynamic中结构体的d_tag值,结构体如下所示:(DT_INIT为可选)

typedef struct {
    Elf32_Sword d_tag;
    union {
        Elf32_Word  d_val;
        Elf32_Addr  d_ptr;
    } d_un;
} Elf32_Dyn;

其中
defineDT_INIT 12 /* Address of initialization function /
define DT_INIT_ARRAY 25 /
Address of initialization function array */

上面的函数中d->d_un.d_ptr的d其实是Elf32_Dyn的指针。
联合体d_run存储的是虚拟地址,init_func_和init_array_的地址就是.init和.init_array节的地址。
所以,call_function("DT_INIT", init_func_);call_array("DT_INIT_ARRAY", init_array_, init_array_count_, false)最终调用的是.init和.init_array节区的代码。
所以,逻辑就走完了,在调用dlopen函数的同时不仅仅是加载库文件还会调用.init和.inti_array节区的代码。这一点在脱壳中尤为重要!

你可能感兴趣的:(Android 加载so的函数调用)