拖了好久,想开始分析下redex
系列
redex_frontend
这一part会分析redex_frontend
,redex_frontend
是在开始各个pass
之前做的预处理,包括加载classes.dex
, 收集类,分析混淆关系,分析代码可达性。
DexStore root_store("classes");
// Only set dex magic to root DexStore since all dex magic
// should be consistent within one APK.
root_store.set_dex_magic(get_dex_magic(args.dex_files));
这里get_dex_magic
其实就会去加载dex
文件,然后从dex
头部获取magic。
loadClass
dex_stats_t dex_stats;
DexClasses classes =
load_classes_from_dex(filename.c_str(), &dex_stats);
input_totals += dex_stats;
input_dexes_stats.push_back(dex_stats);
stores[0].add_classes(std::move(classes));
找到文件里的dex_files
开始遍历加载类
DexLoader
DexClasses DexLoader::load_dex(const char* location,
dex_stats_t* stats,
bool support_dex_v37) {
const dex_header* dh = get_dex_header(location);
validate_dex_header(dh, m_file.size(), support_dex_v37);
return load_dex(dh, stats);
}
DexClasses DexLoader::load_dex(const dex_header* dh, dex_stats_t* stats) {
if (dh->class_defs_size == 0) {
return DexClasses(0);
}
m_idx = new DexIdx(dh);
auto off = (uint64_t)dh->class_defs_off;
m_class_defs =
reinterpret_cast((const uint8_t*)dh + off);
DexClasses classes(dh->class_defs_size);
m_classes = &classes;
auto lwork = new class_load_work[dh->class_defs_size];
for(work in class_load_work):
clw->dl->load_dex_class(clw->num); //这里简化了一下
...
gather_input_stats(stats, dh);
// Remove nulls from the classes list. They may have been introduced by benign
// duplicate classes.
classes.erase(std::remove(classes.begin(), classes.end(), nullptr),
classes.end());
return classes;
}
每个DexClass
都有自己的dex_class_def
结构,以数组形式index
作为偏移量
Dexclass Create
开始为每个类构建DexClass
DexClass* DexClass::create(DexIdx* idx,
const dex_class_def* cdef,
const std::string& location) {
DexClass* cls = new DexClass(idx, cdef, location);
if (g_redex->class_already_loaded(cls)) {
// FIXME: This isn't deterministic. We're keeping whichever class we loaded
// first, which may not always be from the same dex (if we load them in
// parallel, for example).
delete cls;
return nullptr;
}
cls->load_class_annotations(idx, cdef->annotations_off);
auto deva = std::unique_ptr(
load_static_values(idx, cdef->static_values_off));
cls->load_class_data_item(idx, cdef->class_data_offset, deva.get());
g_redex->publish_class(cls);
return cls;
}
分为三块:
load_class_annotation
代码看着很复杂,就是就是分别找到class&field&method
绑定的annotation
以及方法内部参数对应的annotation
void DexClass::load_class_annotations(DexIdx* idx, uint32_t anno_off) {
if (anno_off == 0) return;
const dex_annotations_directory_item* annodir =
(const dex_annotations_directory_item*)idx->get_uint_data(anno_off);
m_anno =
DexAnnotationSet::get_annotation_set(idx, annodir->class_annotations_off);
const uint32_t* annodata = (uint32_t*)(annodir + 1);
for (uint32_t i = 0; i < annodir->fields_size; i++) {
uint32_t fidx = *annodata++;
uint32_t off = *annodata++;
DexField* field = static_cast(idx->get_fieldidx(fidx));
DexAnnotationSet* aset = DexAnnotationSet::get_annotation_set(idx, off);
field->attach_annotation_set(aset);
}
for (uint32_t i = 0; i < annodir->methods_size; i++) {
uint32_t midx = *annodata++;
uint32_t off = *annodata++;
DexMethod* method = static_cast(idx->get_methodidx(midx));
DexAnnotationSet* aset = DexAnnotationSet::get_annotation_set(idx, off);
method->attach_annotation_set(aset);
}
for (uint32_t i = 0; i < annodir->parameters_size; i++) {
uint32_t midx = *annodata++;
uint32_t xrefoff = *annodata++;
if (xrefoff != 0) {
DexMethod* method = static_cast(idx->get_methodidx(midx));
const uint32_t* annoxref = idx->get_uint_data(xrefoff);
uint32_t count = *annoxref++;
for (uint32_t j = 0; j < count; j++) {
uint32_t off = annoxref[j];
DexAnnotationSet* aset = DexAnnotationSet::get_annotation_set(idx, off);
if (aset != nullptr) {
method->attach_param_annotation_set(j, aset);
redex_assert(method->get_param_anno());
}
}
}
}
}
load_static_value
DexEncodedValueArray* get_encoded_value_array(DexIdx* idx,
const uint8_t*& encdata) {
uint32_t size = read_uleb128(&encdata);
auto* evlist = new std::deque();
for (uint32_t i = 0; i < size; i++) {
DexEncodedValue* adev = DexEncodedValue::get_encoded_value(idx, encdata);
evlist->push_back(adev);
}
return new DexEncodedValueArray(evlist);
}
获取静态变量背后的值,比如static int a = 1
,那就是1
,后面会把这个1
和变量a
绑定在一起
load_class_data_item
重点函数,拿到sfield&ifield&dmethod&vmethod
的个数,然后加载进来,构造出DexField&DexMethod
void DexClass::load_class_data_item(DexIdx* idx,
uint32_t cdi_off,
DexEncodedValueArray* svalues) {
if (cdi_off == 0) return;
const uint8_t* encd = idx->get_uleb_data(cdi_off);
uint32_t sfield_count = read_uleb128(&encd);
uint32_t ifield_count = read_uleb128(&encd);
uint32_t dmethod_count = read_uleb128(&encd);
uint32_t vmethod_count = read_uleb128(&encd);
uint32_t ndex = 0;
for (uint32_t i = 0; i < sfield_count; i++) { //获取静态fields
ndex += read_uleb128(&encd); //获取field index
auto access_flags = (DexAccessFlags)read_uleb128(&encd); //获取access_flags
DexField* df = static_cast(idx->get_fieldidx(ndex));
DexEncodedValue* ev = nullptr; //static field 对应的value值
if (svalues != nullptr) {
ev = svalues->pop_next();
}
df->make_concrete(access_flags, ev);
m_sfields.push_back(df);
}
ndex = 0;
for (uint32_t i = 0; i < ifield_count; i++) {
ndex += read_uleb128(&encd);
auto access_flags = (DexAccessFlags)read_uleb128(&encd);
DexField* df = static_cast(idx->get_fieldidx(ndex)); //从index获取dexField,一个dexField由三部分组成,class的DexType,type//对应的Dextype,name对应的dexString
df->make_concrete(access_flags);
m_ifields.push_back(df);
}
std::unordered_set method_pointer_cache;
ndex = 0;
for (uint32_t i = 0; i < dmethod_count; i++) {
ndex += read_uleb128(&encd);
auto access_flags = (DexAccessFlags)read_uleb128(&encd);
uint32_t code_off = read_uleb128(&encd);
// Find method in method index, returns same pointer for same method.
DexMethod* dm = static_cast(idx->get_methodidx(ndex));//class DexType, proto DexProto, name DexString
std::unique_ptr dc = DexCode::get_dex_code(idx, code_off);
if (dc && dc->get_debug_item()) {
dc->get_debug_item()->bind_positions(dm, m_source_file);
}
dm->make_concrete(access_flags, std::move(dc), false);
assert_or_throw(
method_pointer_cache.count(dm) == 0, RedexError::DUPLICATE_METHODS,
"Found duplicate methods in the same class.", {{"method", SHOW(dm)}});
method_pointer_cache.insert(dm);
m_dmethods.push_back(dm);
}
ndex = 0;
for (uint32_t i = 0; i < vmethod_count; i++) {
ndex += read_uleb128(&encd);
auto access_flags = (DexAccessFlags)read_uleb128(&encd);
uint32_t code_off = read_uleb128(&encd);
// Find method in method index, returns same pointer for same method.
DexMethod* dm = static_cast(idx->get_methodidx(ndex));
auto dc = DexCode::get_dex_code(idx, code_off);
if (dc && dc->get_debug_item()) {
dc->get_debug_item()->bind_positions(dm, m_source_file);
}
dm->make_concrete(access_flags, std::move(dc), true);
assert_or_throw(
method_pointer_cache.count(dm) == 0, RedexError::DUPLICATE_METHODS,
"Found duplicate methods in the same class.", {{"method", SHOW(dm)}});
method_pointer_cache.insert(dm);
m_vmethods.push_back(dm);
}
}
我们以DexField
为例:
ndex += read_uleb128(&encd);
auto access_flags = (DexAccessFlags)read_uleb128(&encd);
DexField* df = static_cast(idx->get_fieldidx(ndex));
DexEncodedValue* ev = nullptr;
if (svalues != nullptr) {
ev = svalues->pop_next();
}
df->make_concrete(access_flags, ev);
m_sfields.push_back(df);
首先获得该field
的index
,然后读取出accessFlag
, 然后根据idx
构造DexField
:
DexFieldRef* DexIdx::get_fieldidx_fromdex(uint32_t fidx) {
redex_assert(fidx < m_field_ids_size);
DexType* container = get_typeidx(m_field_ids[fidx].classidx);
DexType* ftype = get_typeidx(m_field_ids[fidx].typeidx);
DexString* name = get_stringidx(m_field_ids[fidx].nameidx);
return DexField::make_field(container, name, ftype);
}
其实就是把field
所出去类的类型,field
的类型和名字找到拼起来。
get_dex_code 组织dexcode
构建DexMethod
时比变量多一步dexcode
,会将这个dexCode
与dexMethod
绑定。
std::unique_ptr DexCode::get_dex_code(DexIdx* idx, uint32_t offset) {
if (offset == 0) return std::unique_ptr();
const dex_code_item* code = (const dex_code_item*)idx->get_uint_data(offset);
std::unique_ptr dc(new DexCode());
dc->m_registers_size = code->registers_size;
dc->m_ins_size = code->ins_size;
dc->m_outs_size = code->outs_size;
dc->m_insns.reset(new std::vector());
const uint16_t* cdata = (const uint16_t*)(code + 1);
uint32_t tries = code->tries_size;
if (code->insns_size) { //收集所有指令 构成instruction
const uint16_t* end = cdata + code->insns_size;
while (cdata < end) {
DexInstruction* dop = DexInstruction::make_instruction(idx, &cdata);
always_assert_log(dop != nullptr,
"Failed to parse method at offset 0x%08x", offset);
dc->m_insns->push_back(dop);
}
/*
* Padding, see dex-spec.
* Per my memory, there are dex-files where the padding is
* implemented not according to spec. Just FYI in case
* something weird happens in the future.
*/
if (code->insns_size & 1 && tries) cdata++;
}
if (tries) { //try catch结构。。不具体分析了
const dex_tries_item* dti = (const dex_tries_item*)cdata;
const uint8_t* handlers = (const uint8_t*)(dti + tries);
for (uint32_t i = 0; i < tries; i++) {
DexTryItem* dextry = new DexTryItem(dti[i].start_addr, dti[i].insn_count);
const uint8_t* handler = handlers + dti[i].handler_off;
int32_t count = read_sleb128(&handler);
bool has_catchall = false;
if (count <= 0) {
count = -count;
has_catchall = true;
}
while (count--) {
uint32_t tidx = read_uleb128(&handler);
uint32_t hoff = read_uleb128(&handler);
DexType* dt = idx->get_typeidx(tidx);
dextry->m_catches.push_back(std::make_pair(dt, hoff));
}
if (has_catchall) {
auto hoff = read_uleb128(&handler);
dextry->m_catches.push_back(std::make_pair(nullptr, hoff));
}
dc->m_tries.emplace_back(dextry);
}
}
dc->m_dbg = DexDebugItem::get_dex_debug(idx, code->debug_info_off); //留到后面debug的时候分析
return dc;
}
debug信息
这里处理dexFile
内的调试信息,后面分析
if (dc && dc->get_debug_item()) {
dc->get_debug_item()->bind_positions(dm, m_source_file);
}
解混淆
这里处理混淆关系,后面分析
for (const auto& pg_config_path : args.proguard_config_paths) {
Timer time_pg_parsing("Parsed ProGuard config file");
keep_rules::proguard_parser::parse_file(pg_config_path, &pg_config);
}
keep_rules::proguard_parser::remove_blacklisted_rules(&pg_config);
for (auto& store : stores) {
apply_deobfuscated_names(store.get_dexen(), conf.get_proguard_map());
}
build_class_scope
很简单,收集DexClasses而已
DexStoreClassesIterator it(stores);
Scope scope = build_class_scope(it);
oprimization
这里还是和混淆有关,后面分析
{
Timer t("Processing proguard rules");
bool keep_all_annotation_classes;
json_config.get("keep_all_annotation_classes", true,
keep_all_annotation_classes);
process_proguard_rules(conf.get_proguard_map(), scope, external_classes,
pg_config, keep_all_annotation_classes);
}
{
Timer t("No Optimizations Rules");
// this will change rstate of methods
keep_rules::process_no_optimizations_rules(
conf.get_no_optimizations_annos(), scope);
monitor_count::mark_sketchy_methods_with_no_optimize(scope);
}
分析可达性
分析反射&代码调用中可达的类,后面分析
/*
* Initializes list of classes that are reachable via reflection, and calls
* or from code.
*
* These include:
* - Classes used in the manifest (e.g. activities, services, etc)
* - View or Fragment classes used in layouts
* - Classes that are in certain packages (specified in the reflected_packages
* section of the config) and classes that extend from them
* - Classes marked with special annotations (keep_annotations in config)
* - Classes reachable from native libraries
*/
{
Timer t("Initializing reachable classes");
// init reachable will change rstate of classes, methods and fields
init_reachable_classes(scope, json_config,
conf.get_no_optimizations_annos());
}