Android源码学习——linker(4)

本文学习的源码参考AndroidXRef,版本为Lollipop 5.1.0_r1。


前面讲完了so的加载,这一章来讲so的链接过程。so的链接是实际上就是完成符号的重定位。

分别看下PrelinkImage和LinkImage的实现。首先是PrelinkImage,这个函数很长,我们一段段来看:

bool soinfo::PrelinkImage() {
  /* Extract dynamic section */
  ElfW(Word) dynamic_flags = 0;
  phdr_table_get_dynamic_section(phdr, phnum, load_bias, &dynamic, &dynamic_flags);

  /* We can't log anything until the linker is relocated */
  bool relocating_linker = (flags & FLAG_LINKER) != 0;
  if (!relocating_linker) {
    INFO("[ linking %s ]", name);
    DEBUG("si->base = %p si->flags = 0x%08x", reinterpret_cast<void*>(base), flags);
  }

  if (dynamic == nullptr) {
    if (!relocating_linker) {
      DL_ERR("missing PT_DYNAMIC in \"%s\"", name);
    }
    return false;
  } else {
    if (!relocating_linker) {
      DEBUG("dynamic = %p", dynamic);
    }
  }

#if defined(__arm__)
  (void) phdr_table_get_arm_exidx(phdr, phnum, load_bias,
                                  &ARM_exidx, &ARM_exidx_count);
#endif

  ......

首先是调用phdr_table_get_dynamic_section获取动态节区。
看下怎么获得的:

void phdr_table_get_dynamic_section(const ELF::Phdr* phdr_table,
                                    int phdr_count,
                                    ELF::Addr load_bias,
                                    const ELF::Dyn** dynamic,
                                    size_t* dynamic_count,
                                    ELF::Word* dynamic_flags) {
  const ELF::Phdr* phdr = phdr_table;
  const ELF::Phdr* phdr_limit = phdr + phdr_count;

  for (phdr = phdr_table; phdr < phdr_limit; phdr++) {
    if (phdr->p_type != PT_DYNAMIC) {
      continue;
    }

    *dynamic = reinterpret_cast<const ELF::Dyn*>(load_bias + phdr->p_vaddr);
    if (dynamic_count) {
      *dynamic_count = (unsigned)(phdr->p_memsz / sizeof(ELF::Dyn));
    }
    if (dynamic_flags) {
      *dynamic_flags = phdr->p_flags;
    }
    return;
  }
  *dynamic = NULL;
  if (dynamic_count) {
    *dynamic_count = 0;
  }
}

从第一个程序头表项开始遍历,找类型为PT_DYNAMIC的项,那么就可以找到这一段对应的动态节区。并且,用该段内存大小p_memsz 除以一个动态节区符号对象的大小sizeof(ELF::Dyn))得到动态节区中符号的数目。

回到PrelinkImage中,继续往下看:

  // Extract useful information from dynamic section.
  uint32_t needed_count = 0;
  for (ElfW(Dyn)* d = dynamic; d->d_tag != DT_NULL; ++d) {
    DEBUG("d = %p, d[0](tag) = %p d[1](val) = %p",
          d, reinterpret_cast<void*>(d->d_tag), reinterpret_cast<void*>(d->d_un.d_val));
    switch (d->d_tag) {

然后开始一项项地遍历动态节区里面的符号对象,看下这个对象的结构:

struct Elf32_Dyn
{
  Elf32_Sword d_tag;            // Type of dynamic table entry.
  union
  {
      Elf32_Word d_val;         // Integer value of entry.
      Elf32_Addr d_ptr;         // Pointer value of entry.
  } d_un;
};

两部分,一个4字节的d_tag,然后一个4字节的联合体,可能为d_val,也可能为一个地址d_ptr。
而这里对Elf32_Dyn这个结构做解析,就是针对不同的d_tag取值进行不同的操作。
后面内容很长,我们挑几个重要的来说:

      case DT_HASH:
        nbucket = reinterpret_cast(load_bias + d->d_un.d_ptr)[0];
        nchain = reinterpret_cast(load_bias + d->d_un.d_ptr)[1];
        bucket = reinterpret_cast(load_bias + d->d_un.d_ptr + 8);
        chain = reinterpret_cast(load_bias + d->d_un.d_ptr + 8 + nbucket * 4);
        break;

这个动态符号对象是关于哈希表的描述,d_un.d_ptr给出了哈希表的地址。然后就依次可以取到nbucket和nchain,以及保存符号表索引的bucket和chain数组。这是为了方便我们后面查找符号表。

      case DT_STRTAB:
        strtab = reinterpret_cast<const char*>(load_bias + d->d_un.d_ptr);
        break;

      case DT_STRSZ:
        strtab_size = d->d_un.d_val;
        break;

分别给出了字符串表的地址和大小(字节数)。

      case DT_SYMTAB:
        symtab = reinterpret_cast(load_bias + d->d_un.d_ptr);
        break;

给出了符号表的地址。

      case DT_SYMENT:
        if (d->d_un.d_val != sizeof(ElfW(Sym))) {
          DL_ERR("invalid DT_SYMENT: %zd", static_cast(d->d_un.d_val));
          return false;
        }
        break;

判断所给的符号表的表项大小是不是正确。

      case DT_PLTREL:
#if defined(USE_RELA)
        if (d->d_un.d_val != DT_RELA) {
          DL_ERR("unsupported DT_PLTREL in \"%s\"; expected DT_RELA", name);
          return false;
        }
#else
        if (d->d_un.d_val != DT_REL) {
          DL_ERR("unsupported DT_PLTREL in \"%s\"; expected DT_REL", name);
          return false;
        }
#endif
        break;

给出过程连接表(PLT)所引用的重定位项的类型,可能为DT_RELA(元素为显示对齐)或DT_REL(元素为隐式对齐)。

      case DT_JMPREL:
#if defined(USE_RELA)
        plt_rela = reinterpret_cast(load_bias + d->d_un.d_ptr);
#else
        plt_rel = reinterpret_cast(load_bias + d->d_un.d_ptr);
#endif
        break;

      case DT_PLTRELSZ:
#if defined(USE_RELA)
        plt_rela_count = d->d_un.d_val / sizeof(ElfW(Rela));
#else
        plt_rel_count = d->d_un.d_val / sizeof(ElfW(Rel));
#endif
        break;

DT_JMPREL指明了重定位表的地址,而DT_PLTRELSZ则指明了重定位表的大小(字节数)。

      case DT_PLTGOT:
#if defined(__mips__)
        // Used by mips and mips64.
        plt_got = reinterpret_cast(load_bias + d->d_un.d_ptr);
#endif
        // Ignore for other platforms... (because RTLD_LAZY is not supported)
        break;

如果是mips架构,会给出一个跟过程链接表(PLT)关联的全局偏移表(GOT)的地址,但是其他平台上并不支持RTLD_LAZY ,所以不需要这一项。

      case DT_INIT:
        init_func = reinterpret_cast(load_bias + d->d_un.d_ptr);
        DEBUG("%s constructors (DT_INIT) found at %p", name, init_func);
        break;

      case DT_FINI:
        fini_func = reinterpret_cast(load_bias + d->d_un.d_ptr);
        DEBUG("%s destructors (DT_FINI) found at %p", name, fini_func);
        break;

      case DT_INIT_ARRAY:
        init_array = reinterpret_cast(load_bias + d->d_un.d_ptr);
        DEBUG("%s constructors (DT_INIT_ARRAY) found at %p", name, init_array);
        break;

      case DT_INIT_ARRAYSZ:
        init_array_count = ((unsigned)d->d_un.d_val) / sizeof(ElfW(Addr));
        break;

      case DT_FINI_ARRAY:
        fini_array = reinterpret_cast(load_bias + d->d_un.d_ptr);
        DEBUG("%s destructors (DT_FINI_ARRAY) found at %p", name, fini_array);
        break;

      case DT_FINI_ARRAYSZ:
        fini_array_count = ((unsigned)d->d_un.d_val) / sizeof(ElfW(Addr));
        break;

      case DT_PREINIT_ARRAY:
        preinit_array = reinterpret_cast(load_bias + d->d_un.d_ptr);
        DEBUG("%s constructors (DT_PREINIT_ARRAY) found at %p", name, preinit_array);
        break;

      case DT_PREINIT_ARRAYSZ:
        preinit_array_count = ((unsigned)d->d_un.d_val) / sizeof(ElfW(Addr));
        break;

分别为初始化函数(init,为初始化函数指令)地址、结束函数地址、初始化函数数组(init_array,其实里面是一些函数地址)的地址、数组项数、结束函数数组的地址、数组项数,以及预初始化函数数组的地址、数组项数。

最后一部分:

  // Sanity checks.
  if (relocating_linker && needed_count != 0) {
    DL_ERR("linker cannot have DT_NEEDED dependencies on other libraries");
    return false;
  }
  if (nbucket == 0) {
    DL_ERR("empty/missing DT_HASH in \"%s\" (built with --hash-style=gnu?)", name);
    return false;
  }
  if (strtab == 0) {
    DL_ERR("empty/missing DT_STRTAB in \"%s\"", name);
    return false;
  }
  if (symtab == 0) {
    DL_ERR("empty/missing DT_SYMTAB in \"%s\"", name);
    return false;
  }
  return true;
}

会对提取到的值做一些检查,并返回,PrelinkImage就完成了。

这里我们完成了动态节区的解析,重定位需要的重定位表、符号表、哈希表以及字符串表等等也都准备完成 ,接下来就是真正进行重定位的过程了,继续看下半部分,LinkImage的实现:

bool soinfo::LinkImage(const android_dlextinfo* extinfo) {

#if !defined(__LP64__)
  if (has_text_relocations) {
    // Make segments writable to allow text relocations to work properly. We will later call
    // phdr_table_protect_segments() after all of them are applied and all constructors are run.
    DL_WARN("%s has text relocations. This is wasting memory and prevents "
            "security hardening. Please fix.", name);
    if (phdr_table_unprotect_segments(phdr, phnum, load_bias) < 0) {
      DL_ERR("can't unprotect loadable segments for \"%s\": %s",
             name, strerror(errno));
      return false;
    }
  }
#endif

#if defined(USE_RELA)
  if (rela != nullptr) {
    DEBUG("[ relocating %s ]", name);
    if (Relocate(rela, rela_count)) {
      return false;
    }
  }
  if (plt_rela != nullptr) {
    DEBUG("[ relocating %s plt ]", name);
    if (Relocate(plt_rela, plt_rela_count)) {
      return false;
    }
  }
#else
  if (rel != nullptr) {
    DEBUG("[ relocating %s ]", name);
    if (Relocate(rel, rel_count)) {
      return false;
    }
  }
  if (plt_rel != nullptr) {
    DEBUG("[ relocating %s plt ]", name);
    if (Relocate(plt_rel, plt_rel_count)) {
      return false;
    }
  }
#endif

#if defined(__mips__)
  if (!mips_relocate_got(this)) {
    return false;
  }
#endif

  DEBUG("[ finished linking %s ]", name);

#if !defined(__LP64__)
  if (has_text_relocations) {
    // All relocations are done, we can protect our segments back to read-only.
    if (phdr_table_protect_segments(phdr, phnum, load_bias) < 0) {
      DL_ERR("can't protect segments for \"%s\": %s",
             name, strerror(errno));
      return false;
    }
  }
#endif

  /* We can also turn on GNU RELRO protection */
  if (phdr_table_protect_gnu_relro(phdr, phnum, load_bias) < 0) {
    DL_ERR("can't enable GNU RELRO protection for \"%s\": %s",
           name, strerror(errno));
    return false;
  }

  /* Handle serializing/sharing the RELRO segment */
  if (extinfo && (extinfo->flags & ANDROID_DLEXT_WRITE_RELRO)) {
    if (phdr_table_serialize_gnu_relro(phdr, phnum, load_bias,
                                       extinfo->relro_fd) < 0) {
      DL_ERR("failed serializing GNU RELRO section for \"%s\": %s",
             name, strerror(errno));
      return false;
    }
  } else if (extinfo && (extinfo->flags & ANDROID_DLEXT_USE_RELRO)) {
    if (phdr_table_map_gnu_relro(phdr, phnum, load_bias,
                                 extinfo->relro_fd) < 0) {
      DL_ERR("failed mapping GNU RELRO section for \"%s\": %s",
             name, strerror(errno));
      return false;
    }
  }

  notify_gdb_of_load(this);
  return true;
}

重点是两处重定位的地方,如果是USE_RELA的情况,就去调用Relocate(rela, rela_count)Relocate(plt_rela, plt_rela_count),另一方面,如果是USE_REL的情况,就去调用Relocate(rel, rel_count)Relocate(plt_rel, plt_rel_count)

继续看Relocate这个函数的实现吧:

bool SharedLibrary::Relocate(LibraryList* lib_list,
                             Vector<LibraryView*>* dependencies,
                             Error* error) {
  // Apply relocations.
  LOG("%s: Applying relocations to %s\n", __FUNCTION__, base_name_);

  ElfRelocations relocations;

  if (!relocations.Init(&view_, error))
    return false;

  SharedLibraryResolver resolver(this, lib_list, dependencies);
  if (!relocations.ApplyAll(&symbols_, &resolver, error))
    return false;

  LOG("%s: Relocations applied for %s\n", __FUNCTION__, base_name_);
  return true;
}

主要是初始化了一个ElfRelocations的对象,然后分别去调用了它的Init方法和ApplyAll方法。

先看init方法:

bool ElfRelocations::Init(const ElfView* view, Error* error) {
  // Save these for later.
  phdr_ = view->phdr();
  phdr_count_ = view->phdr_count();
  load_bias_ = view->load_bias();

  // We handle only Rel or Rela, but not both. If DT_RELA or DT_RELASZ
  // then we require DT_PLTREL to agree.
  bool has_rela_relocations = false;
  bool has_rel_relocations = false;

  // Parse the dynamic table.
  ElfView::DynamicIterator dyn(view);
  for (; dyn.HasNext(); dyn.GetNext()) {
    ELF::Addr dyn_value = dyn.GetValue();
    uintptr_t dyn_addr = dyn.GetAddress(view->load_bias());

    const ELF::Addr tag = dyn.GetTag();
    switch (tag) {
      case DT_PLTREL:
        RLOG("  DT_PLTREL value=%d\n", dyn_value);
        if (dyn_value != DT_REL && dyn_value != DT_RELA) {
          *error = "Invalid DT_PLTREL value in dynamic section";
          return false;
        }
        relocations_type_ = dyn_value;
        break;
      case DT_JMPREL:
        RLOG("  DT_JMPREL addr=%p\n", dyn_addr);
        plt_relocations_ = dyn_addr;
        break;
      case DT_PLTRELSZ:
        plt_relocations_size_ = dyn_value;
        RLOG("  DT_PLTRELSZ size=%d\n", dyn_value);
        break;
      case DT_RELA:
      case DT_REL:
        RLOG("  %s addr=%p\n",
             (tag == DT_RELA) ? "DT_RELA" : "DT_REL",
             dyn_addr);
        if (relocations_) {
          *error = "Unsupported DT_RELA/DT_REL combination in dynamic section";
          return false;
        }
        relocations_ = dyn_addr;
        if (tag == DT_RELA)
          has_rela_relocations = true;
        else
          has_rel_relocations = true;
        break;
      case DT_RELASZ:
      case DT_RELSZ:
        RLOG("  %s size=%d\n",
             (tag == DT_RELASZ) ? "DT_RELASZ" : "DT_RELSZ",
             dyn_addr);
        if (relocations_size_) {
          *error = "Unsupported DT_RELASZ/DT_RELSZ combination in dyn section";
          return false;
        }
        relocations_size_ = dyn_value;
        if (tag == DT_RELASZ)
          has_rela_relocations = true;
        else
          has_rel_relocations = true;
        break;
      case DT_PLTGOT:
        // Only used on MIPS currently. Could also be used on other platforms
        // when lazy binding (i.e. RTLD_LAZY) is implemented.
        RLOG("  DT_PLTGOT addr=%p\n", dyn_addr);
        plt_got_ = reinterpret_cast(dyn_addr);
        break;
      case DT_TEXTREL:
        RLOG("  DT_TEXTREL\n");
        has_text_relocations_ = true;
        break;
      case DT_SYMBOLIC:
        RLOG("  DT_SYMBOLIC\n");
        has_symbolic_ = true;
        break;
      case DT_FLAGS:
        if (dyn_value & DF_TEXTREL)
          has_text_relocations_ = true;
        if (dyn_value & DF_SYMBOLIC)
          has_symbolic_ = true;
        RLOG(" DT_FLAGS has_text_relocations=%s has_symbolic=%s\n",
             has_text_relocations_ ? "true" : "false",
             has_symbolic_ ? "true" : "false");
        break;
#if defined(__mips__)
      case DT_MIPS_SYMTABNO:
        RLOG("  DT_MIPS_SYMTABNO value=%d\n", dyn_value);
        mips_symtab_count_ = dyn_value;
        break;

      case DT_MIPS_LOCAL_GOTNO:
        RLOG("  DT_MIPS_LOCAL_GOTNO value=%d\n", dyn_value);
        mips_local_got_count_ = dyn_value;
        break;

      case DT_MIPS_GOTSYM:
        RLOG("  DT_MIPS_GOTSYM value=%d\n", dyn_value);
        mips_gotsym_ = dyn_value;
        break;
#endif
      default:
        ;
    }
  }

  if (relocations_type_ != DT_REL && relocations_type_ != DT_RELA) {
    *error = "Unsupported or missing DT_PLTREL in dynamic section";
    return false;
  }

  if (relocations_type_ == DT_REL && has_rela_relocations) {
    *error = "Found DT_RELA in dyn section, but DT_PLTREL is DT_REL";
    return false;
  }
  if (relocations_type_ == DT_RELA && has_rel_relocations) {
    *error = "Found DT_REL in dyn section, but DT_PLTREL is DT_RELA";
    return false;
  }

  return true;
}

好吧,相当于又解析了一遍。

接着看ApplyAll方法:

bool ElfRelocations::ApplyAll(const ElfSymbols* symbols,
                              SymbolResolver* resolver,
                              Error* error) {
  LOG("%s: Enter\n", __FUNCTION__);

  if (has_text_relocations_) {
    if (phdr_table_unprotect_segments(phdr_, phdr_count_, load_bias_) < 0) {
      error->Format("Can't unprotect loadable segments: %s", strerror(errno));
      return false;
    }
  }

  if (relocations_type_ == DT_REL) {
    if (!ApplyRelRelocs(reinterpret_cast(plt_relocations_),
                        plt_relocations_size_ / sizeof(ELF::Rel),
                        symbols,
                        resolver,
                        error))
      return false;
    if (!ApplyRelRelocs(reinterpret_cast(relocations_),
                        relocations_size_ / sizeof(ELF::Rel),
                        symbols,
                        resolver,
                        error))
      return false;
  }

  else if (relocations_type_ == DT_RELA) {
    if (!ApplyRelaRelocs(reinterpret_cast(plt_relocations_),
                         plt_relocations_size_ / sizeof(ELF::Rela),
                         symbols,
                         resolver,
                         error))
      return false;
    if (!ApplyRelaRelocs(reinterpret_cast(relocations_),
                         relocations_size_ / sizeof(ELF::Rela),
                         symbols,
                         resolver,
                         error))
      return false;
  }

#ifdef __mips__
  if (!RelocateMipsGot(symbols, resolver, error))
    return false;
#endif

  if (has_text_relocations_) {
    if (phdr_table_protect_segments(phdr_, phdr_count_, load_bias_) < 0) {
      error->Format("Can't reprotect loadable segments: %s", strerror(errno));
      return false;
    }
  }

  LOG("%s: Done\n", __FUNCTION__);
  return true;
}

还是两步走,如果是DT_REL,那么就去调用ApplyRelRelocs执行plt_relocations_和relocations_的重定位;如果是DT_RELA,那么就去调用ApplyRelaRelocs执行plt_relocations_和relocations_的重定位。

我们只看一个,另一个逻辑是差不多的:

bool ElfRelocations::ApplyRelRelocs(const ELF::Rel* rel,
                                    size_t rel_count,
                                    const ElfSymbols* symbols,
                                    SymbolResolver* resolver,
                                    Error* error) {
  RLOG("%s: rel=%p rel_count=%d\n", __FUNCTION__, rel, rel_count);

  if (!rel)
    return true;

  for (size_t rel_n = 0; rel_n < rel_count; rel++, rel_n++) {
    const ELF::Word rel_type = ELF_R_TYPE(rel->r_info);
    const ELF::Word rel_symbol = ELF_R_SYM(rel->r_info);

    ELF::Addr sym_addr = 0;
    ELF::Addr reloc = static_cast(rel->r_offset + load_bias_);
    RLOG("  %d/%d reloc=%p offset=%p type=%d symbol=%d\n",
         rel_n + 1,
         rel_count,
         reloc,
         rel->r_offset,
         rel_type,
         rel_symbol);

    if (rel_type == 0)
      continue;

    bool resolved = false;

    // If this is a symbolic relocation, compute the symbol's address.
    if (__builtin_expect(rel_symbol != 0, 0)) {
      resolved = ResolveSymbol(rel_type,
                               rel_symbol,
                               symbols,
                               resolver,
                               reloc,
                               &sym_addr,
                               error);
    }

    if (!ApplyRelReloc(rel, sym_addr, resolved, error))
      return false;
  }

  return true;
}

从重定位表的第一项开始,一个个解析。先看下重定位表项的格式:

struct Elf32_Rel {
  Elf32_Addr r_offset; // Location (file byte offset, or program virtual addr)
  Elf32_Word r_info;   // Symbol table index and type of relocation to apply

  // These accessors and mutators correspond to the ELF32_R_SYM, ELF32_R_TYPE,
  // and ELF32_R_INFO macros defined in the ELF specification:
  Elf32_Word getSymbol() const { return (r_info >> 8); }
  unsigned char getType() const { return (unsigned char) (r_info & 0x0ff); }
  void setSymbol(Elf32_Word s) { setSymbolAndType(s, getType()); }
  void setType(unsigned char t) { setSymbolAndType(getSymbol(), t); }
  void setSymbolAndType(Elf32_Word s, unsigned char t) {
    r_info = (s << 8) + t;
  }
};

两个字段,前面4字节是需要进行重定位的地址,后面4字节包含要进行重定位的符号表索引以及重定位的类型。

执行重定位的时候,先获得需要进行重定位的地址,加上基地址就是内存中的地址reloc,然后分别得到重定位的符号表类型rel_type和索引rel_symbol。然后调用ResolveSymbol去解析这个符号的实际地址resolved,最后利用这个地址去实现重定位ApplyRelReloc

看下解析是怎么实现的:

bool ElfRelocations::ResolveSymbol(ELF::Word rel_type,
                                   ELF::Word rel_symbol,
                                   const ElfSymbols* symbols,
                                   SymbolResolver* resolver,
                                   ELF::Addr reloc,
                                   ELF::Addr* sym_addr,
                                   Error* error) {
  const char* sym_name = symbols->LookupNameById(rel_symbol);
  RLOG("    symbol name='%s'\n", sym_name);
  void* address = resolver->Lookup(sym_name);

  if (address) {
    // The symbol was found, so compute its address.
    RLOG("%s: symbol %s resolved to %p\n", __FUNCTION__, sym_name, address);
    *sym_addr = reinterpret_cast(address);
    return true;
  }

  // The symbol was not found. Normally this is an error except
  // if this is a weak reference.
  if (!symbols->IsWeakById(rel_symbol)) {
    error->Format("Could not find symbol '%s'", sym_name);
    return false;
  }

  RLOG("%s: weak reference to unresolved symbol %s\n", __FUNCTION__, sym_name);

  // IHI0044C AAELF 4.5.1.1:
  // Libraries are not searched to resolve weak references.
  // It is not an error for a weak reference to remain
  // unsatisfied.
  //
  // During linking, the value of an undefined weak reference is:
  // - Zero if the relocation type is absolute
  // - The address of the place if the relocation is pc-relative
  // - The address of nominal base address if the relocation
  //   type is base-relative.
  RelocationType r = GetRelocationType(rel_type);
  if (r == RELOCATION_TYPE_ABSOLUTE || r == RELOCATION_TYPE_RELATIVE) {
    *sym_addr = 0;
    return true;
  }

  if (r == RELOCATION_TYPE_PC_RELATIVE) {
    *sym_addr = reloc;
    return true;
  }

  error->Format(
      "Invalid weak relocation type (%d) for unknown symbol '%s'",
      r,
      sym_name);
  return false;
}

首先是调用LookupNameById根据rel_symbol找到对应的符号名称sym_name,然后调用resolver的Lookup找到sym_name对应的符号地址address,最后做一个类型转换变成sym_addr。
如果根据符号索引找不到对应的符号,要么说明重定位的过程出错了,要么说明这个符号是一个弱链接。

看下符号的查找过程:

  const char* LookupNameById(size_t symbol_id) const {
    const ELF::Sym* sym = LookupById(symbol_id);
    if (!sym)
      return NULL;
    return string_table_ + sym->st_name;
  }

  const ELF::Sym* LookupById(size_t symbol_id) const {
    return &symbol_table_[symbol_id];
  }

symbol_id表示了该符号在符号表中的索引,symbol_table_[symbol_id]则表示了该符号在字符串表中的索引,那么就可以得到符号的名称了。

  virtual void* Lookup(const char* symbol_name) {
    // TODO(digit): Add the ability to lookup inside the main executable.

    // First, look inside the current library.
    const ELF::Sym* entry = lib_->LookupSymbolEntry(symbol_name);
    if (entry)
      return reinterpret_cast<void*>(lib_->load_bias() + entry->st_value);

    // Special case: redirect the dynamic linker symbols to our wrappers.
    // This ensures that loaded libraries can call dlopen() / dlsym()
    // and transparently use the crazy linker to perform their duty.
    void* address = WrapLinkerSymbol(symbol_name);
    if (address)
      return address;

    // Then look inside the dependencies.
    for (size_t n = 0; n < dependencies_->GetCount(); ++n) {
      LibraryView* wrap = (*dependencies_)[n];
      // LOG("%s: Looking into dependency %p (%s)\n", __FUNCTION__, wrap,
      // wrap->GetName());
      if (wrap->IsSystem()) {
        address = ::dlsym(wrap->GetSystem(), symbol_name);
#ifdef __arm__
        // Android libm.so defines isnanf as weak. This means that its
        // address cannot be found by dlsym(), which always returns NULL
        // for weak symbols. However, libm.so contains the real isnanf
        // as __isnanf. If we encounter isnanf and fail to resolve it in
        // libm.so, retry with __isnanf.
        //
        // This occurs only in clang, which lacks __builtin_isnanf. The
        // gcc compiler implements isnanf as a builtin, so the symbol
        // isnanf never need be resolved in gcc builds.
        //
        // http://code.google.com/p/chromium/issues/detail?id=376828
        if (!address &&
            !strcmp(symbol_name, "isnanf") &&
            !strcmp(wrap->GetName(), "libm.so"))
          address = ::dlsym(wrap->GetSystem(), "__isnanf");
#endif
        if (address)
          return address;
      }
      if (wrap->IsCrazy()) {
        SharedLibrary* dep = wrap->GetCrazy();
        entry = dep->LookupSymbolEntry(symbol_name);
        if (entry)
          return reinterpret_cast<void*>(dep->load_bias() + entry->st_value);
      }
    }

    // Nothing found here.
    return NULL;
  }

首先,在当前的库中找,LookupSymbolEntry找到了就直接返回地址。
特殊情况下,会对动态链接符号做一个封装WrapLinkerSymbol,保证被加载的库可以直接通过dlopen() / dlsym()来进行链接。
如果本地库中没找到,那么就会再去依赖库中找。

找到符号地址之后,就要去重定位了,看下ApplyRelReloc的实现:

bool ElfRelocations::ApplyRelReloc(const ELF::Rel* rel,
                                   ELF::Addr sym_addr,
                                   bool resolved CRAZY_UNUSED,
                                   Error* error) {
  const ELF::Word rel_type = ELF_R_TYPE(rel->r_info);
  const ELF::Word CRAZY_UNUSED rel_symbol = ELF_R_SYM(rel->r_info);

  const ELF::Addr reloc = static_cast(rel->r_offset + load_bias_);

  RLOG("  rel reloc=%p offset=%p type=%d\n", reloc, rel->r_offset, rel_type);

  // Apply the relocation.
  ELF::Addr* CRAZY_UNUSED target = reinterpret_cast(reloc);
  switch (rel_type) {
#ifdef __arm__
    case R_ARM_JUMP_SLOT:
      RLOG("  R_ARM_JUMP_SLOT target=%p addr=%p\n", target, sym_addr);
      *target = sym_addr;
      break;

    case R_ARM_GLOB_DAT:
      RLOG("  R_ARM_GLOB_DAT target=%p addr=%p\n", target, sym_addr);
      *target = sym_addr;
      break;

    case R_ARM_ABS32:
      RLOG("  R_ARM_ABS32 target=%p (%p) addr=%p\n",
           target,
           *target,
           sym_addr);
      *target += sym_addr;
      break;

    case R_ARM_REL32:
      RLOG("  R_ARM_REL32 target=%p (%p) addr=%p offset=%p\n",
           target,
           *target,
           sym_addr,
           rel->r_offset);
      *target += sym_addr - rel->r_offset;
      break;

    case R_ARM_RELATIVE:
      RLOG("  R_ARM_RELATIVE target=%p (%p) bias=%p\n",
           target,
           *target,
           load_bias_);
      if (__builtin_expect(rel_symbol, 0)) {
        *error = "Invalid relative relocation with symbol";
        return false;
      }
      *target += load_bias_;
      break;

    case R_ARM_COPY:
      // NOTE: These relocations are forbidden in shared libraries.
      // The Android linker has special code to deal with this, which
      // is not needed here.
      RLOG("  R_ARM_COPY\n");
      *error = "Invalid R_ARM_COPY relocation in shared library";
      return false;
#endif  // __arm__

#ifdef __i386__
    case R_386_JMP_SLOT:
      *target = sym_addr;
      break;

    case R_386_GLOB_DAT:
      *target = sym_addr;
      break;

    case R_386_RELATIVE:
      if (rel_symbol) {
        *error = "Invalid relative relocation with symbol";
        return false;
      }
      *target += load_bias_;
      break;

    case R_386_32:
      *target += sym_addr;
      break;

    case R_386_PC32:
      *target += (sym_addr - reloc);
      break;
#endif  // __i386__

#ifdef __mips__
    case R_MIPS_REL32:
      if (resolved)
        *target += sym_addr;
      else
        *target += load_bias_;
      break;
#endif  // __mips__

    default:
      error->Format("Invalid relocation type (%d)", rel_type);
      return false;
  }

  return true;
}

reloc是需要进行重定位的地址,sym_addr是符号的地址,rel_type是重定位的类型。可以看到执行重定位时会根据不同的类型进行不同的处理,把对应的sym_addr赋给*target。

至此,重定位的过程就全部完成了。


你可能感兴趣的:(android源码学习)