0x00
这篇文章我们分析Android脱壳神器DexHunter的源代码。DexHunter作者也写了一篇介绍它的文章从Android运行时出发,打造我们的脱壳神器。DexHunter源代码位于https://github.com/zyq8709/DexHunter。
0x01
DexHunter 实现中,只需要修改一处文件:dalvik\vm\native\dalvik_system_DexFile.cpp
下面是BeyondCompare比对:
我们看到DexHunter的代码都位于系统源代码vm目录下,所以要运行DexHunter需要dalvik\vm\native\dalvik_system_DexFile.cpp代码,然后在源码环境下编译,最后刷机运行。
0x02
核心原理请参考从Android运行时出发,打造我们的脱壳神器,简单的从代码角度说,在Dalvik_dalvik_system_DexFile_defineClassNative中插入代码来修复被破坏的dex,原理有两个:
1、在DVM中:
显式加载:
ClassLoader.loadClass对应Dalvik_dalvik_system_DexFile_defineClassNative
Class.forName对应Dalvik_java_lang_Class_classForName
隐式加载:
对应dvmResolveClass
如下图:
第一点说明了时机,也就是为什么在Dalvik_dalvik_system_DexFile_defineClassNative插入代码。
2、执行dvmDefineClass,形成的ClassObject结构体的变量都是有效的。dvmDefineClass会在我们插入的代码中调用,详见后面的分析。
0x03
下面用注释的方式来分析源代码。
分析之前先附上两张图,有助于分析。
图 1
其中baseAddr指向DexHeader的首地址,图中有错误。下文中经常看到mem->addr指向DexOptHeader的首地址。
再附上一张图,分析时会用到:
图 2
//------------------------added begin----------------------// #include <asm/siginfo.h> #include "libdex/DexClass.h" #include <sys/stat.h> #include <fcntl.h> #include <sys/mman.h> static char dexname[100]={0}; static char dumppath[100]={0}; static bool readable=true; static pthread_mutex_t read_mutex; static bool flag=true; static pthread_mutex_t mutex; static bool timer_flag=true; static timer_t timerId; struct arg{ DvmDex* pDvmDex; Object * loader; }param; void timer_thread(sigval_t) { timer_flag=false; timer_delete(timerId); ALOGI("GOT IT time up"); } void* ReadThread(void *arg){ FILE *fp = NULL; while (dexname[0]==0||dumppath[0]==0) { fp=fopen("/data/dexname", "r"); if (fp==NULL) { sleep(1); continue; } fgets(dexname,99,fp);//从/data/dexname获取字符串赋值给dexname,github中已经给出了,是/data/data/com.example.seventyfour.tencenttest/files/libmobisecy1.zip,这是用来脱阿里壳子时使用的 dexname[strlen(dexname)-1]=0; fgets(dumppath,99,fp);//这是生成4个文件的总目录/data/data/com.example.seventyfour.tencenttest/,后面我们会看到4个文件分别是part1,data,classdef,extra dumppath[strlen(dumppath)-1]=0; fclose(fp); fp=NULL; } struct sigevent sev; sev.sigev_notify=SIGEV_THREAD; sev.sigev_value.sival_ptr=&timerId; sev.sigev_notify_function=timer_thread; sev.sigev_notify_attributes = NULL; timer_create(CLOCK_REALTIME,&sev,&timerId); struct itimerspec ts; ts.it_value.tv_sec=5; ts.it_value.tv_nsec=0; ts.it_interval.tv_sec=0; ts.it_interval.tv_nsec=0; timer_settime(timerId,0,&ts,NULL); return NULL; } void ReadClassDataHeader(const uint8_t** pData, DexClassDataHeader *pHeader) { pHeader->staticFieldsSize = readUnsignedLeb128(pData); pHeader->instanceFieldsSize = readUnsignedLeb128(pData); pHeader->directMethodsSize = readUnsignedLeb128(pData); pHeader->virtualMethodsSize = readUnsignedLeb128(pData); } void ReadClassDataField(const uint8_t** pData, DexField* pField) { pField->fieldIdx = readUnsignedLeb128(pData); pField->accessFlags = readUnsignedLeb128(pData); } void ReadClassDataMethod(const uint8_t** pData, DexMethod* pMethod) { pMethod->methodIdx = readUnsignedLeb128(pData); pMethod->accessFlags = readUnsignedLeb128(pData); pMethod->codeOff = readUnsignedLeb128(pData); } DexClassData* ReadClassData(const uint8_t** pData) { DexClassDataHeader header; if (*pData == NULL) { return NULL; } ReadClassDataHeader(pData,&header); size_t resultSize = sizeof(DexClassData) + (header.staticFieldsSize * sizeof(DexField)) + (header.instanceFieldsSize * sizeof(DexField)) + (header.directMethodsSize * sizeof(DexMethod)) + (header.virtualMethodsSize * sizeof(DexMethod)); DexClassData* result = (DexClassData*) malloc(resultSize); if (result == NULL) { return NULL; } uint8_t* ptr = ((uint8_t*) result) + sizeof(DexClassData); result->header = header; if (header.staticFieldsSize != 0) { result->staticFields = (DexField*) ptr; ptr += header.staticFieldsSize * sizeof(DexField); } else { result->staticFields = NULL; } if (header.instanceFieldsSize != 0) { result->instanceFields = (DexField*) ptr; ptr += header.instanceFieldsSize * sizeof(DexField); } else { result->instanceFields = NULL; } if (header.directMethodsSize != 0) { result->directMethods = (DexMethod*) ptr; ptr += header.directMethodsSize * sizeof(DexMethod); } else { result->directMethods = NULL; } if (header.virtualMethodsSize != 0) { result->virtualMethods = (DexMethod*) ptr; } else { result->virtualMethods = NULL; } for (uint32_t i = 0; i < header.staticFieldsSize; i++) { ReadClassDataField(pData, &result->staticFields[i]); } for (uint32_t i = 0; i < header.instanceFieldsSize; i++) { ReadClassDataField(pData, &result->instanceFields[i]); } for (uint32_t i = 0; i < header.directMethodsSize; i++) { ReadClassDataMethod(pData, &result->directMethods[i]); } for (uint32_t i = 0; i < header.virtualMethodsSize; i++) { ReadClassDataMethod(pData, &result->virtualMethods[i]); } return result; } void writeLeb128(uint8_t ** ptr, uint32_t data) { while (true) { uint8_t out = data & 0x7f; if (out != data) { *(*ptr)++ = out | 0x80; data >>= 7; } else { *(*ptr)++ = out; break; } } } uint8_t* EncodeClassData(DexClassData *pData, int& len) { len=0; len+=unsignedLeb128Size(pData->header.staticFieldsSize); len+=unsignedLeb128Size(pData->header.instanceFieldsSize); len+=unsignedLeb128Size(pData->header.directMethodsSize); len+=unsignedLeb128Size(pData->header.virtualMethodsSize); if (pData->staticFields) { for (uint32_t i = 0; i < pData->header.staticFieldsSize; i++) { len+=unsignedLeb128Size(pData->staticFields[i].fieldIdx); len+=unsignedLeb128Size(pData->staticFields[i].accessFlags); } } if (pData->instanceFields) { for (uint32_t i = 0; i < pData->header.instanceFieldsSize; i++) { len+=unsignedLeb128Size(pData->instanceFields[i].fieldIdx); len+=unsignedLeb128Size(pData->instanceFields[i].accessFlags); } } if (pData->directMethods) { for (uint32_t i=0; i<pData->header.directMethodsSize; i++) { len+=unsignedLeb128Size(pData->directMethods[i].methodIdx); len+=unsignedLeb128Size(pData->directMethods[i].accessFlags); len+=unsignedLeb128Size(pData->directMethods[i].codeOff); } } if (pData->virtualMethods) { for (uint32_t i=0; i<pData->header.virtualMethodsSize; i++) { len+=unsignedLeb128Size(pData->virtualMethods[i].methodIdx); len+=unsignedLeb128Size(pData->virtualMethods[i].accessFlags); len+=unsignedLeb128Size(pData->virtualMethods[i].codeOff); } } uint8_t * store = (uint8_t *) malloc(len); if (!store) { return NULL; } uint8_t * result=store; writeLeb128(&store,pData->header.staticFieldsSize); writeLeb128(&store,pData->header.instanceFieldsSize); writeLeb128(&store,pData->header.directMethodsSize); writeLeb128(&store,pData->header.virtualMethodsSize); if (pData->staticFields) { for (uint32_t i = 0; i < pData->header.staticFieldsSize; i++) { writeLeb128(&store,pData->staticFields[i].fieldIdx); writeLeb128(&store,pData->staticFields[i].accessFlags); } } if (pData->instanceFields) { for (uint32_t i = 0; i < pData->header.instanceFieldsSize; i++) { writeLeb128(&store,pData->instanceFields[i].fieldIdx); writeLeb128(&store,pData->instanceFields[i].accessFlags); } } if (pData->directMethods) { for (uint32_t i=0; i<pData->header.directMethodsSize; i++) { writeLeb128(&store,pData->directMethods[i].methodIdx); writeLeb128(&store,pData->directMethods[i].accessFlags); writeLeb128(&store,pData->directMethods[i].codeOff); } } if (pData->virtualMethods) { for (uint32_t i=0; i<pData->header.virtualMethodsSize; i++) { writeLeb128(&store,pData->virtualMethods[i].methodIdx); writeLeb128(&store,pData->virtualMethods[i].accessFlags); writeLeb128(&store,pData->virtualMethods[i].codeOff); } } free(pData); return result; } uint8_t* codeitem_end(const u1** pData) { uint32_t num_of_list = readUnsignedLeb128(pData); for (;num_of_list>0;num_of_list--) { int32_t num_of_handlers=readSignedLeb128(pData); int num=num_of_handlers; if (num_of_handlers<=0) { num=-num_of_handlers; } for (; num > 0; num--) { readUnsignedLeb128(pData); readUnsignedLeb128(pData); } if (num_of_handlers<=0) { readUnsignedLeb128(pData); } } return (uint8_t*)(*pData); } void* DumpClass(void *parament) { while (timer_flag) { sleep(5); } DvmDex* pDvmDex=((struct arg*)parament)->pDvmDex; Object *loader=((struct arg*)parament)->loader; DexFile* pDexFile=pDvmDex->pDexFile; MemMapping * mem=&pDvmDex->memMap; u4 time=dvmGetRelativeTimeMsec(); ALOGI("GOT IT begin: %d ms",time); char *path = new char[100]; strcpy(path,dumppath); strcat(path,"classdef"); FILE *fp = fopen(path, "wb+");//fp指向classdef文件 strcpy(path,dumppath); strcat(path,"extra"); FILE *fp1 = fopen(path,"wb+");//fp1指向extra文件 uint32_t mask=0x3ffff; char padding=0; const char* header="Landroid"; unsigned int num_class_defs=pDexFile->pHeader->classDefsSize;//class_def的数量 uint32_t total_pointer = mem->length-uint32_t(pDexFile->baseAddr-(const u1*)mem->addr);//末尾地址相对DexHeader头部的偏移地址 uint32_t rec=total_pointer; while (total_pointer&3) { total_pointer++;//对齐 } int inc=total_pointer-rec; uint32_t start = pDexFile->pHeader->classDefsOff+sizeof(DexClassDef)*num_class_defs;//若干个class_def_item之后的地址相对于DexHeader头部的偏移地址 uint32_t end = (uint32_t)((const u1*)mem->addr+mem->length-pDexFile->baseAddr);//末尾地址相对DexHeader头部的偏移地址 for (size_t i=0;i<num_class_defs;i++)//遍历class_def_item { bool need_extra=false; ClassObject * clazz=NULL; const u1* data=NULL; DexClassData* pData = NULL; bool pass=false; const DexClassDef *pClassDef = dexGetClassDef(pDvmDex->pDexFile, i);//找到了对应的class_def_item,如图2 const char *descriptor = dexGetClassDescriptor(pDvmDex->pDexFile,pClassDef); if(!strncmp(header,descriptor,8)||!pClassDef->classDataOff) { pass=true; goto classdef; } clazz = dvmDefineClass(pDvmDex, descriptor, loader);//前面说过,脱壳程序能够运行的很重要的原理是,这里生成的ClassObject结构体中的变量都是有效的。 if (!clazz) { continue; } ALOGI("GOT IT class: %s",descriptor); if (!dvmIsClassInitialized(clazz)) { if(dvmInitClass(clazz)){ ALOGI("GOT IT init: %s",descriptor); } } if(pClassDef->classDataOff<start || pClassDef->classDataOff>end)//如果classDataOff的偏移落在范围外,那就需要生成extra部分,详解后面的代码 { need_extra=true; } data=dexGetClassData(pDexFile,pClassDef);//通过class_def获取class_data_item,详解图2 pData = ReadClassData(&data);//读取现在内存中的值,形成了DexClassData结构体pData,注意此时pData中的变量可能是错误的,很简单的一个例子就是http://blog.csdn.net/jltxgcy/article/details/50581259,参考这篇文章 if (!pData) { continue; } if (pData->directMethods) { for (uint32_t i=0; i<pData->header.directMethodsSize; i++) { Method *method = &(clazz->directMethods[i]);//此时获取Method结构体method里面的变量是正确的 uint32_t ac = (method->accessFlags) & mask;//正确的accessFlags ALOGI("GOT IT direct method name %s.%s",descriptor,method->name); if (!method->insns||ac&ACC_NATIVE) {//正确的insns没有指令,所以要调整 if (pData->directMethods[i].codeOff) { need_extra = true; pData->directMethods[i].accessFlags=ac; pData->directMethods[i].codeOff=0; } continue; } u4 codeitem_off = u4((const u1*)method->insns-16-pDexFile->baseAddr);//获取正确的指令偏移 if (ac != pData->directMethods[i].accessFlags)//当前的accessFlag和正确的accessFlag不一致,所以要调整 { ALOGI("GOT IT method ac"); need_extra=true; pData->directMethods[i].accessFlags=ac; } if (codeitem_off!=pData->directMethods[i].codeOff&&((codeitem_off>=start&&codeitem_off<=end)||codeitem_off==0)) {//如果正确的codeitem_off不等于现在的codeitem_off,且正确的codeitem_off在范围内,则需要调整 ALOGI("GOT IT method code"); need_extra=true; pData->directMethods[i].codeOff=codeitem_off; } if ((codeitem_off<start || codeitem_off>end) && codeitem_off!=0) {//在http://blog.csdn.net/jltxgcy/article/details/50581259,所属的情况在这个判断中,真是的codeitem_off在范围(start--end)外。 need_extra=true; pData->directMethods[i].codeOff = total_pointer;//让codeOff指向dex结尾的偏移 DexCode *code = (DexCode*)((const u1*)method->insns-16);//正确的DexCode uint8_t *item=(uint8_t *) code; int code_item_len = 0; if (code->triesSize) { const u1 * handler_data = dexGetCatchHandlerData(code); const u1** phandler=(const u1**)&handler_data; uint8_t * tail=codeitem_end(phandler); code_item_len = (int)(tail-item); }else{ code_item_len = 16+code->insnsSize*2;//正确的DexCode的大小 } ALOGI("GOT IT method code changed"); fwrite(item,1,code_item_len,fp1);//把DexCode写入extra中 fflush(fp1); total_pointer+=code_item_len; while (total_pointer&3) { fwrite(&padding,1,1,fp1); fflush(fp1); total_pointer++; } } } } if (pData->virtualMethods) {//同理 for (uint32_t i=0; i<pData->header.virtualMethodsSize; i++) { Method *method = &(clazz->virtualMethods[i]); uint32_t ac = (method->accessFlags) & mask; ALOGI("GOT IT virtual method name %s.%s",descriptor,method->name); if (!method->insns||ac&ACC_NATIVE) { if (pData->virtualMethods[i].codeOff) { need_extra = true; pData->virtualMethods[i].accessFlags=ac; pData->virtualMethods[i].codeOff=0; } continue; } u4 codeitem_off = u4((const u1 *)method->insns - 16 - pDexFile->baseAddr); if (ac != pData->virtualMethods[i].accessFlags) { ALOGI("GOT IT method ac"); need_extra=true; pData->virtualMethods[i].accessFlags=ac; } if (codeitem_off!=pData->virtualMethods[i].codeOff&&((codeitem_off>=start&&codeitem_off<=end)||codeitem_off==0)) { ALOGI("GOT IT method code"); need_extra=true; pData->virtualMethods[i].codeOff=codeitem_off; } if ((codeitem_off<start || codeitem_off>end)&&codeitem_off!=0) { need_extra=true; pData->virtualMethods[i].codeOff = total_pointer; DexCode *code = (DexCode*)((const u1*)method->insns-16); uint8_t *item=(uint8_t *) code; int code_item_len = 0; if (code->triesSize) { const u1 *handler_data = dexGetCatchHandlerData(code); const u1** phandler=(const u1**)&handler_data; uint8_t * tail=codeitem_end(phandler); code_item_len = (int)(tail-item); }else{ code_item_len = 16+code->insnsSize*2; } ALOGI("GOT IT method code changed"); fwrite(item,1,code_item_len,fp1); fflush(fp1); total_pointer+=code_item_len; while (total_pointer&3) { fwrite(&padding,1,1,fp1); fflush(fp1); total_pointer++; } } } } classdef: DexClassDef temp=*pClassDef; uint8_t *p = (uint8_t *)&temp; if (need_extra) { ALOGI("GOT IT classdata before"); int class_data_len = 0; uint8_t *out = EncodeClassData(pData,class_data_len); if (!out) { continue; } temp.classDataOff = total_pointer;//class_def的classDataOff指向了新生成的class_data_item fwrite(out,1,class_data_len,fp1);//将这个class_data_item写入extra,此class_data_item中的codeOff已经改变了 fflush(fp1); total_pointer+=class_data_len; while (total_pointer&3) { fwrite(&padding,1,1,fp1); fflush(fp1); total_pointer++; } free(out); ALOGI("GOT IT classdata written"); }else{ if (pData) { free(pData); } } if (pass) { temp.classDataOff=0; temp.annotationsOff=0; } ALOGI("GOT IT classdef"); fwrite(p, sizeof(DexClassDef), 1, fp);//将class_def入classdef文件,改class_def中的classDataOff已经改变了 fflush(fp); } fclose(fp1); fclose(fp); //最后执行完毕后,把四个文件按照先后顺序合并在一起,依次是part1,classdef,data,extra,最后生成dex就是正确的dex strcpy(path,dumppath); strcat(path,"whole.dex"); fp = fopen(path,"wb+"); rewind(fp); int fd=-1; int r=-1; int len=0; char *addr=NULL; struct stat st; strcpy(path,dumppath); strcat(path,"part1"); fd=open(path,O_RDONLY,0666); if (fd==-1) { return NULL; } r=fstat(fd,&st); if(r==-1){ close(fd); return NULL; } len=st.st_size; addr=(char*)mmap(NULL,len,PROT_READ,MAP_PRIVATE,fd,0); fwrite(addr,1,len,fp); fflush(fp); munmap(addr,len); close(fd); strcpy(path,dumppath); strcat(path,"classdef"); fd=open(path,O_RDONLY,0666); if (fd==-1) { return NULL; } r=fstat(fd,&st); if(r==-1){ close(fd); return NULL; } len=st.st_size; addr=(char*)mmap(NULL,len,PROT_READ,MAP_PRIVATE,fd,0); fwrite(addr,1,len,fp); fflush(fp); munmap(addr,len); close(fd); strcpy(path,dumppath); strcat(path,"data"); fd=open(path,O_RDONLY,0666); if (fd==-1) { return NULL; } r=fstat(fd,&st); if(r==-1){ close(fd); return NULL; } len=st.st_size; addr=(char*)mmap(NULL,len,PROT_READ,MAP_PRIVATE,fd,0); fwrite(addr,1,len,fp); fflush(fp); munmap(addr,len); close(fd); while (inc>0) { fwrite(&padding,1,1,fp); fflush(fp); inc--; } strcpy(path,dumppath); strcat(path,"extra"); fd=open(path,O_RDONLY,0666); if (fd==-1) { return NULL; } r=fstat(fd,&st); if(r==-1){ close(fd); return NULL; } len=st.st_size; addr=(char*)mmap(NULL,len,PROT_READ,MAP_PRIVATE,fd,0); fwrite(addr,1,len,fp); fflush(fp); munmap(addr,len); close(fd); fclose(fp); delete path; time=dvmGetRelativeTimeMsec(); ALOGI("GOT IT end: %d ms",time); return NULL; } //------------------------added end----------------------// static void Dalvik_dalvik_system_DexFile_defineClassNative(const u4* args, JValue* pResult) { StringObject* nameObj = (StringObject*) args[0]; Object* loader = (Object*) args[1]; int cookie = args[2]; ClassObject* clazz = NULL; DexOrJar* pDexOrJar = (DexOrJar*) cookie; DvmDex* pDvmDex; char* name; char* descriptor; name = dvmCreateCstrFromString(nameObj); descriptor = dvmDotToDescriptor(name); ALOGV("--- Explicit class load '%s' l=%p c=0x%08x", descriptor, loader, cookie); free(name); if (!validateCookie(cookie)) RETURN_VOID(); if (pDexOrJar->isDex) pDvmDex = dvmGetRawDexFileDex(pDexOrJar->pRawDexFile); else pDvmDex = dvmGetJarFileDex(pDexOrJar->pJarFile); /* once we load something, we can't unmap the storage */ pDexOrJar->okayToFree = false; //------------------------added begin----------------------// int uid=getuid(); if (uid) { if (readable) { pthread_mutex_lock(&read_mutex); if (readable) {//只执行一次,避免多次执行 readable=false; pthread_mutex_unlock(&read_mutex); pthread_t read_thread; pthread_create(&read_thread, NULL, ReadThread, NULL);//在新的线程中执行ReadThread,ReadThread在上面的代码中 }else{ pthread_mutex_unlock(&read_mutex); } } } if(uid&&strcmp(dexname,"")){ char * res=strstr(pDexOrJar->fileName, dexname);//这个是dexname的用途,用来比较,只有匹配的dex才会执行后面的代码 if (res&&flag) { pthread_mutex_lock(&mutex); if (flag) { flag = false; pthread_mutex_unlock(&mutex); DexFile* pDexFile=pDvmDex->pDexFile; MemMapping * mem=&pDvmDex->memMap; char * temp=new char[100]; strcpy(temp,dumppath); strcat(temp,"part1"); FILE *fp = fopen(temp, "wb+"); const u1 *addr = (const u1*)mem->addr; int length=int(pDexFile->baseAddr+pDexFile->pHeader->classDefsOff-addr);//class_defs之前的内容的长度,mem->addr,和baseAddr是理解这个长度的关键,在代码前的红字部分有说明和图解。 fwrite(addr,1,length,fp);//class_defs之前的内容写入part1 fflush(fp); fclose(fp); strcpy(temp,dumppath); strcat(temp,"data"); fp = fopen(temp, "wb+"); addr = pDexFile->baseAddr+pDexFile->pHeader->classDefsOff+sizeof(DexClassDef)*pDexFile->pHeader->classDefsSize; length=int((const u1*)mem->addr+mem->length-addr);//class_defs之后的内容的长度 fwrite(addr,1,length,fp);//class_defs之后的内容写入data中 fflush(fp); fclose(fp); delete temp; param.loader=loader; param.pDvmDex=pDvmDex; pthread_t dumpthread; dvmCreateInternalThread(&dumpthread,"ClassDumper",DumpClass,(void*)param);//在新的线程中执行DumpClass,参数是param,这个函数的实现在前面的代码中 }else{ pthread_mutex_unlock(&mutex); } } } //------------------------added end----------------------// clazz = dvmDefineClass(pDvmDex, descriptor, loader); Thread* self = dvmThreadSelf(); if (dvmCheckException(self)) { /* * If we threw a "class not found" exception, stifle it, since the * contract in the higher method says we simply return null if * the class is not found. */ Object* excep = dvmGetException(self); if (strcmp(excep->clazz->descriptor, "Ljava/lang/ClassNotFoundException;") == 0 || strcmp(excep->clazz->descriptor, "Ljava/lang/NoClassDefFoundError;") == 0) { dvmClearException(self); } clazz = NULL; } free(descriptor); RETURN_PTR(clazz); }
如果是运行时动态修复dex这篇文章运行的DynamicDex.apk,上面的代码解释也着重分析了,解析后会形成下图的结构(正确的dex)。
第一部分是part1,第二部分是classdef,第三部分是data,第四部分是extra。
第二部分的其中一个class_def_item中的classDataOff指向了extra中class_data_item。
extra中的class_data_item中的codeOff指向了extra中DexCode。