Facebook Redex分析(一)

拖了好久,想开始分析下redex系列

redex_frontend

这一part会分析redex_frontend,redex_frontend是在开始各个pass之前做的预处理,包括加载classes.dex, 收集类,分析混淆关系,分析代码可达性。

DexStore root_store("classes");
// Only set dex magic to root DexStore since all dex magic
// should be consistent within one APK.
root_store.set_dex_magic(get_dex_magic(args.dex_files));

这里get_dex_magic其实就会去加载dex文件,然后从dex头部获取magic。

loadClass

dex_stats_t dex_stats;
DexClasses classes =
    load_classes_from_dex(filename.c_str(), &dex_stats);
input_totals += dex_stats;
input_dexes_stats.push_back(dex_stats);
stores[0].add_classes(std::move(classes));

找到文件里的dex_files开始遍历加载类

DexLoader

DexClasses DexLoader::load_dex(const char* location,
                               dex_stats_t* stats,
                               bool support_dex_v37) {
  const dex_header* dh = get_dex_header(location);
  validate_dex_header(dh, m_file.size(), support_dex_v37);
  return load_dex(dh, stats);
}
DexClasses DexLoader::load_dex(const dex_header* dh, dex_stats_t* stats) {
  if (dh->class_defs_size == 0) {
    return DexClasses(0);
  }
  m_idx = new DexIdx(dh);
  auto off = (uint64_t)dh->class_defs_off;
  m_class_defs =
      reinterpret_cast((const uint8_t*)dh + off);
  DexClasses classes(dh->class_defs_size);
  m_classes = &classes;

  auto lwork = new class_load_work[dh->class_defs_size];
  for(work in class_load_work):
    clw->dl->load_dex_class(clw->num); //这里简化了一下
  ...
  gather_input_stats(stats, dh);

  // Remove nulls from the classes list. They may have been introduced by benign
  // duplicate classes.
  classes.erase(std::remove(classes.begin(), classes.end(), nullptr),
                classes.end());

  return classes;
}

每个DexClass都有自己的dex_class_def结构,以数组形式index作为偏移量

Dexclass Create

开始为每个类构建DexClass

DexClass* DexClass::create(DexIdx* idx,
                           const dex_class_def* cdef,
                           const std::string& location) {
  DexClass* cls = new DexClass(idx, cdef, location);
  if (g_redex->class_already_loaded(cls)) {
    // FIXME: This isn't deterministic. We're keeping whichever class we loaded
    // first, which may not always be from the same dex (if we load them in
    // parallel, for example).
    delete cls;
    return nullptr;
  }
  cls->load_class_annotations(idx, cdef->annotations_off);
  auto deva = std::unique_ptr(
      load_static_values(idx, cdef->static_values_off));
  cls->load_class_data_item(idx, cdef->class_data_offset, deva.get());
  g_redex->publish_class(cls);
  return cls;
}

分为三块:

load_class_annotation

代码看着很复杂,就是就是分别找到class&field&method绑定的annotation以及方法内部参数对应的annotation

void DexClass::load_class_annotations(DexIdx* idx, uint32_t anno_off) {
  if (anno_off == 0) return;
  const dex_annotations_directory_item* annodir =
      (const dex_annotations_directory_item*)idx->get_uint_data(anno_off);
  m_anno =
      DexAnnotationSet::get_annotation_set(idx, annodir->class_annotations_off);
  const uint32_t* annodata = (uint32_t*)(annodir + 1);
  for (uint32_t i = 0; i < annodir->fields_size; i++) {
    uint32_t fidx = *annodata++;
    uint32_t off = *annodata++;
    DexField* field = static_cast(idx->get_fieldidx(fidx));
    DexAnnotationSet* aset = DexAnnotationSet::get_annotation_set(idx, off);
    field->attach_annotation_set(aset);
  }
  for (uint32_t i = 0; i < annodir->methods_size; i++) {
    uint32_t midx = *annodata++;
    uint32_t off = *annodata++;
    DexMethod* method = static_cast(idx->get_methodidx(midx));
    DexAnnotationSet* aset = DexAnnotationSet::get_annotation_set(idx, off);
    method->attach_annotation_set(aset);
  }
  for (uint32_t i = 0; i < annodir->parameters_size; i++) {
    uint32_t midx = *annodata++;
    uint32_t xrefoff = *annodata++;
    if (xrefoff != 0) {
      DexMethod* method = static_cast(idx->get_methodidx(midx));
      const uint32_t* annoxref = idx->get_uint_data(xrefoff);
      uint32_t count = *annoxref++;
      for (uint32_t j = 0; j < count; j++) {
        uint32_t off = annoxref[j];
        DexAnnotationSet* aset = DexAnnotationSet::get_annotation_set(idx, off);
        if (aset != nullptr) {
          method->attach_param_annotation_set(j, aset);
          redex_assert(method->get_param_anno());
        }
      }
    }
  }
}

load_static_value
DexEncodedValueArray* get_encoded_value_array(DexIdx* idx,
                                              const uint8_t*& encdata) {
  uint32_t size = read_uleb128(&encdata);
  auto* evlist = new std::deque();
  for (uint32_t i = 0; i < size; i++) {
    DexEncodedValue* adev = DexEncodedValue::get_encoded_value(idx, encdata);
    evlist->push_back(adev);
  }
  return new DexEncodedValueArray(evlist);
}

获取静态变量背后的值,比如static int a = 1,那就是1,后面会把这个1和变量a绑定在一起

load_class_data_item

重点函数,拿到sfield&ifield&dmethod&vmethod的个数,然后加载进来,构造出DexField&DexMethod


void DexClass::load_class_data_item(DexIdx* idx,
                                    uint32_t cdi_off,
                                    DexEncodedValueArray* svalues) {
  if (cdi_off == 0) return;
  const uint8_t* encd = idx->get_uleb_data(cdi_off);
  uint32_t sfield_count = read_uleb128(&encd);
  uint32_t ifield_count = read_uleb128(&encd);
  uint32_t dmethod_count = read_uleb128(&encd);
  uint32_t vmethod_count = read_uleb128(&encd);
  uint32_t ndex = 0;
  for (uint32_t i = 0; i < sfield_count; i++) { //获取静态fields
    ndex += read_uleb128(&encd); //获取field index
    auto access_flags = (DexAccessFlags)read_uleb128(&encd); //获取access_flags
    DexField* df = static_cast(idx->get_fieldidx(ndex));
    DexEncodedValue* ev = nullptr; //static field 对应的value值
    if (svalues != nullptr) {
      ev = svalues->pop_next();
    }
    df->make_concrete(access_flags, ev);
    m_sfields.push_back(df);
  }
  ndex = 0;
  for (uint32_t i = 0; i < ifield_count; i++) {  
    ndex += read_uleb128(&encd);
    auto access_flags = (DexAccessFlags)read_uleb128(&encd);
    DexField* df = static_cast(idx->get_fieldidx(ndex)); //从index获取dexField,一个dexField由三部分组成,class的DexType,type//对应的Dextype,name对应的dexString
    df->make_concrete(access_flags);
    m_ifields.push_back(df);
  }

  std::unordered_set method_pointer_cache;

  ndex = 0;
  for (uint32_t i = 0; i < dmethod_count; i++) {
    ndex += read_uleb128(&encd);
    auto access_flags = (DexAccessFlags)read_uleb128(&encd);
    uint32_t code_off = read_uleb128(&encd);
    // Find method in method index, returns same pointer for same method.
    DexMethod* dm = static_cast(idx->get_methodidx(ndex));//class DexType, proto DexProto, name DexString
    std::unique_ptr dc = DexCode::get_dex_code(idx, code_off);
    if (dc && dc->get_debug_item()) {
      dc->get_debug_item()->bind_positions(dm, m_source_file);
    }
    dm->make_concrete(access_flags, std::move(dc), false);

    assert_or_throw(
        method_pointer_cache.count(dm) == 0, RedexError::DUPLICATE_METHODS,
        "Found duplicate methods in the same class.", {{"method", SHOW(dm)}});

    method_pointer_cache.insert(dm);
    m_dmethods.push_back(dm);
  }
  ndex = 0;
  for (uint32_t i = 0; i < vmethod_count; i++) {
    ndex += read_uleb128(&encd);
    auto access_flags = (DexAccessFlags)read_uleb128(&encd);
    uint32_t code_off = read_uleb128(&encd);
    // Find method in method index, returns same pointer for same method.
    DexMethod* dm = static_cast(idx->get_methodidx(ndex));
    auto dc = DexCode::get_dex_code(idx, code_off);
    if (dc && dc->get_debug_item()) {
      dc->get_debug_item()->bind_positions(dm, m_source_file);
    }
    dm->make_concrete(access_flags, std::move(dc), true);

    assert_or_throw(
        method_pointer_cache.count(dm) == 0, RedexError::DUPLICATE_METHODS,
        "Found duplicate methods in the same class.", {{"method", SHOW(dm)}});

    method_pointer_cache.insert(dm);
    m_vmethods.push_back(dm);
  }
}

我们以DexField为例:

ndex += read_uleb128(&encd);
auto access_flags = (DexAccessFlags)read_uleb128(&encd);
DexField* df = static_cast(idx->get_fieldidx(ndex));
  DexEncodedValue* ev = nullptr;
  if (svalues != nullptr) {
      ev = svalues->pop_next();
  }
  df->make_concrete(access_flags, ev);
m_sfields.push_back(df);

首先获得该fieldindex,然后读取出accessFlag, 然后根据idx构造DexField:

DexFieldRef* DexIdx::get_fieldidx_fromdex(uint32_t fidx) {
  redex_assert(fidx < m_field_ids_size);
  DexType* container = get_typeidx(m_field_ids[fidx].classidx);
  DexType* ftype = get_typeidx(m_field_ids[fidx].typeidx);
  DexString* name = get_stringidx(m_field_ids[fidx].nameidx);
  return DexField::make_field(container, name, ftype);
}

其实就是把field所出去类的类型,field的类型和名字找到拼起来。

get_dex_code 组织dexcode

构建DexMethod时比变量多一步dexcode,会将这个dexCodedexMethod绑定。

std::unique_ptr DexCode::get_dex_code(DexIdx* idx, uint32_t offset) {
  if (offset == 0) return std::unique_ptr();
  const dex_code_item* code = (const dex_code_item*)idx->get_uint_data(offset);
  std::unique_ptr dc(new DexCode());
  dc->m_registers_size = code->registers_size;
  dc->m_ins_size = code->ins_size;
  dc->m_outs_size = code->outs_size;
  dc->m_insns.reset(new std::vector());
  const uint16_t* cdata = (const uint16_t*)(code + 1);
  uint32_t tries = code->tries_size;
  if (code->insns_size) { //收集所有指令 构成instruction
    const uint16_t* end = cdata + code->insns_size;
    while (cdata < end) {
      DexInstruction* dop = DexInstruction::make_instruction(idx, &cdata);
      always_assert_log(dop != nullptr,
                        "Failed to parse method at offset 0x%08x", offset);
      dc->m_insns->push_back(dop);
    }
    /*
     * Padding, see dex-spec.
     * Per my memory, there are dex-files where the padding is
     * implemented not according to spec.  Just FYI in case
     * something weird happens in the future.
     */
    if (code->insns_size & 1 && tries) cdata++;
  }

  if (tries) { //try catch结构。。不具体分析了
    const dex_tries_item* dti = (const dex_tries_item*)cdata;
    const uint8_t* handlers = (const uint8_t*)(dti + tries);
    for (uint32_t i = 0; i < tries; i++) {
      DexTryItem* dextry = new DexTryItem(dti[i].start_addr, dti[i].insn_count);
      const uint8_t* handler = handlers + dti[i].handler_off;
      int32_t count = read_sleb128(&handler);
      bool has_catchall = false;
      if (count <= 0) {
        count = -count;
        has_catchall = true;
      }
      while (count--) {
        uint32_t tidx = read_uleb128(&handler);
        uint32_t hoff = read_uleb128(&handler);
        DexType* dt = idx->get_typeidx(tidx);
        dextry->m_catches.push_back(std::make_pair(dt, hoff));
      }
      if (has_catchall) {
        auto hoff = read_uleb128(&handler);
        dextry->m_catches.push_back(std::make_pair(nullptr, hoff));
      }
      dc->m_tries.emplace_back(dextry);
    }
  }
  dc->m_dbg = DexDebugItem::get_dex_debug(idx, code->debug_info_off); //留到后面debug的时候分析
  return dc;
}

debug信息

这里处理dexFile内的调试信息,后面分析

if (dc && dc->get_debug_item()) {
  dc->get_debug_item()->bind_positions(dm, m_source_file);
}

解混淆

这里处理混淆关系,后面分析

for (const auto& pg_config_path : args.proguard_config_paths) {
    Timer time_pg_parsing("Parsed ProGuard config file");
    keep_rules::proguard_parser::parse_file(pg_config_path, &pg_config);
}
keep_rules::proguard_parser::remove_blacklisted_rules(&pg_config);
for (auto& store : stores) {
  apply_deobfuscated_names(store.get_dexen(), conf.get_proguard_map());
}

build_class_scope

很简单,收集DexClasses而已

DexStoreClassesIterator it(stores);
Scope scope = build_class_scope(it);

oprimization

这里还是和混淆有关,后面分析

{
    Timer t("Processing proguard rules");

    bool keep_all_annotation_classes;
    json_config.get("keep_all_annotation_classes", true,
                    keep_all_annotation_classes);
    process_proguard_rules(conf.get_proguard_map(), scope, external_classes,
                           pg_config, keep_all_annotation_classes);
  }
  {
    Timer t("No Optimizations Rules");
    // this will change rstate of methods
    keep_rules::process_no_optimizations_rules(
        conf.get_no_optimizations_annos(), scope);
    monitor_count::mark_sketchy_methods_with_no_optimize(scope);
  }

分析可达性

分析反射&代码调用中可达的类,后面分析

/*
 * Initializes list of classes that are reachable via reflection, and calls
 * or from code.
 *
 * These include:
 *  - Classes used in the manifest (e.g. activities, services, etc)
 *  - View or Fragment classes used in layouts
 *  - Classes that are in certain packages (specified in the reflected_packages
 *    section of the config) and classes that extend from them
 *  - Classes marked with special annotations (keep_annotations in config)
 *  - Classes reachable from native libraries
 */
  {
    Timer t("Initializing reachable classes");
    // init reachable will change rstate of classes, methods and fields
    init_reachable_classes(scope, json_config,
                           conf.get_no_optimizations_annos());
  }

你可能感兴趣的:(Facebook Redex分析(一))