This is a short readme describing the layout of PyEmu. A module containing a class for defining a context to pass between modules in the emulator The CPU class implements each instruction and is responsible for executing and maintaining state A simple class to ease some debugging tasks The user facing class that implements the public methods available for use. Also is responsible for initiating the memory and cpu classes A helper class for providing abstracted access to the pydasm instruction structures A module containing the memory managers responsible for fetching and storing memory A rough implementation of needed OS specific structures for process creation and control.
lib/ Ero Carrera's pefile implementation
pydasm.pyd: Ero Carrera's libdasm python wrapper
ctypes/_ctypes.pyd: Ctypes library needed for
PyCPU PyMemory
| |
| |
|________ ________|
#!/usr/bin/env python
# PyEmu: scriptable x86 emulator
# License: None
sys.path.append(r'C:\Program Files\IDA\python')
from PyCPU import PyCPU
from PyContext import PyContext
from PyMemory import *
from PyOS import *
The main emulator class. This class implements the public methods
for controlling the emulator. This includes handlers, and initialization.
class PyEmu:
def __init__(self):
# raise_exception: This method gets called when an exception happens
def raise_exception(self, exception, address):
# debug: A public method for setting global debug levels
def debug(self, level):
# execute: A public method for executing instructions
def execute(self, steps=1, start=0x0, end=0x0):
# get_register: A public method to retrieve a register for the user
def get_register(self, register):
# set_register: A public method for setting a registers value
def set_register(self, register, value, name=""):
# get_stack_variable: A public method for setting stack local variables
def get_stack_variable(self, offset, size=0):
# set_stack_variable: A public method for setting a local stack variable
def set_stack_variable(self, offset, value, size=0, name=""):
# get_stack_argument: A public method to get a functions stack argument
def get_stack_argument(self, offset, size=0):
# set_stack_argument: A public method to set up a stack argument
def set_stack_argument(self, offset, value, name=""):
# get_memory: A public method for fetching arbitrary memory
def get_memory(self, address, size=0):
# get_memory_string: A public method to fetch a string from memory
def get_memory_string(self, address):
# set_memory: A public method for setting arbitrary memory
def set_memory(self, address, value, size=0):
# get_selector: A public method for fetching a selector from the LDT
def get_selector(self, selector):
# set_register_handler: A public method for setting a custom register
def set_register_handler(self, register, handler):
# set_mnemonic_handler: A public method for setting a custom mnemonic
def set_mnemonic_handler(self, mnemonic, handler):
# set_opcode_handler: A public method for setting a custom handler
def set_opcode_handler(self, opcode, handler):
# set_pc_handler: A public method for setting a custom handler on
def set_pc_handler(self, address, handler):
# set_exception_handler: A public method for setting a custom
def set_exception_handler(self, exception, handler):
# set_library_handler: A public method for setting a custom
def set_library_handler(self, function, handler):
# set_interrupt_handler: A public method for setting a custom
def set_interrupt_handler(self, interrupt, handler):
# set_memory_handler: A public method for setting a custom handler
def set_memory_handler(self, address, handler):
# set_memory_read_handler: A public memory for setting a custom handler
def set_memory_read_handler(self, handler):
# set_memory_write_handler: A public memory for setting a custom handler
def set_memory_write_handler(self, handler):
# set_memory_access_handler: A public memory for setting a custom handler
def set_memory_access_handler(self, handler):
# set_stack_read_handler: A public memory for setting a custom handler
def set_stack_read_handler(self, handler):
# set_stack_write_handler: A public memory for setting a custom handler
def set_stack_write_handler(self, handler):
# set_stack_access_handler: A public memory for setting a custom handler
def set_stack_access_handler(self, handler):
# set_heap_read_handler: A public memory for setting a custom handler
def set_heap_read_handler(self, handler):
# set_heap_write_handler: A public memory for setting a custom handler
def set_heap_write_handler(self, handler):
# set_heap_access_handler: A public memory for setting a custom handler
def set_heap_access_handler(self, handler):
# dump_regs: A public method to dump the regs from the CPU
def dump_regs(self):
# dump_stack: A public method to dump the stack from ESP and EBP
def dump_stack(self, count=64):
# get_disasm: A public method to get a pretty dump of the current
def get_disasm(self):
The ugliest class name ever. Really the PyEmu class for handling
PyDbg operation. It is responsible for talking between the
emulator and the real process. This is what the user would instantiate.
class PyDbgPyEmu(PyEmu):
def __init__(self, dbg):
def setup_context(self):
The purposed class for emulating in IDA Pro. This has to set up
some basic operating environments for the executable. This is what
the user will be instantiating.
class IDAPyEmu(PyEmu):
def __init__(self, stack_base=0x0095f000, stack_size=0x1000, heap_base=0x000a0000, heap_size=0x2000, frame_pointer=True):
# setup_os: Adds a new thread based on which OS you are using
def setup_os(self):
# setup_context: Sets the needed stack pointers so we can execute
def setup_context(self):
The purposed class for emulating from a raw PE executable. This has
This is what the user will be instantiating.
class PEPyEmu(PyEmu):
def __init__(self, stack_base=0x0095f000, stack_size=0x1000, heap_base=0x000a0000, heap_size=0x2000, frame_pointer=True):
# setup_os: Adds a new thread based on which OS you are using
def setup_os(self):
# setup_context: Sets the needed stack pointers so we can execute
def setup_context(self):
# load: Loads the sections of a binary into the emulator memory
def load(self, exename):
#!/usr/bin/env python
# PyEmu: scriptable x86 emulator
# License: None
A class that allows us to define some properties and methods for each
page of memory in our cache. This could be used to further define
permissions and attributes as needed
class PyMemoryPage:
READ = 0x1
WRITE = 0x2
def __init__(self, address, data="", permissions=0x0):
def get_data(self):
def get_permissions(self):
def set_data(self, data):
def set_permissions(self, permissions):
def set_debug(self, level):
def set_r(self):
def set_w(self):
def set_x(self):
def set_rw(self):
def set_rx(self):
def set_rwx(self):
def is_r(self):
def is_w(self):
def is_x(self):
def is_rx(self):
def is_rwx(self):
The base class for handling memory requests from the PyCPU and PyEmu.
This class should be extended by any custom memory managers.
class PyMemory:
def __init__(self, emu):
# get_memory: Fetches memory first checking local cache, then
def get_memory(self, address, size):
# set_memory: Set an address to a specific value. This can be a
def set_memory(self, address, value, size):
# get_available_page: Will return the next available page starting from address
def get_available_page(self, address):
# is_valid: A helper function to check for a address in our cache
def is_valid(self, address):
def get_page(self, page):
def set_debug(self, level):
# dump_memory: This dumps the data from memory optionally writing
def dump_memory(self, filename=None):
# dump_pages: This will dump all the currently cached memory pages.
def dump_pages(self, data=False):
This is the pydbg memory manager. It extends the base PyMemory class
This is responsible for nothing more than handling requests for
memory if needed. In this case a fetch of unknown memory will make a
call to ReadProcessMemory via the dbg instance.
class PyDbgMemory(PyMemory):
def __init__(self, emu, dbg):
# allocate_page: Allocates a page for addition into the cache
def allocate_page(self, page):
# get_page: This fetches the page from pydbg
def get_page(self, page):
This is the ida memory manager. It extends the base PyMemory class
and is responsible for handling any unknown memory requests. In IDA
this is a tricky call cause we can either throw an exception on invalid
memory accesses or go ahead and fulfill them in case the user did not
set everything up properly. Its really a personal choice.
class IDAMemory(PyMemory):
def __init__(self, emu):
# allocate_page: Allocates a page for addition into the cache
def allocate_page(self, page):
# get_page: Handles unknown memory requests from the base class.
def get_page(self, page):
This is the raw PE file memory handler that is responsible for handling
requests from the base class. Like the others it requests memory when
class PEMemory(PyMemory):
def __init__(self, emu):
# allocate_page: Allocates a page for addition into the cache
def allocate_page(self, page):
# allocate: Allocates a block of memory
def allocate(self, size):
# get_page: Stores a page in the base class cache
def get_page(self, page):
def execute(self):
# Check our program counter handlers
if self.EIP in self.emu.pc_handlers:
if not self.emu.pc_handlers[self.EIP](self.emu, self.EIP):
return False
if self.EIP in self.emu.os.libraries:
library = self.emu.os.libraries[self.EIP]
if self.DEBUG > 1:
print "[*] Calling 0x%08x:%s" % (self.EIP, library['name'])
if library['name'] in self.emu.library_handlers:
result = self.emu.library_handlers[library['name']](library['name'], library['address'])
if not result:
return False
return result
print "[*] Need a handler for [%s]" % (library)
return False
oldeip = self.EIP
# We track instructions executed so we can greatly increase performance
if self.EIP not in self.executed_instructions:
# Fetch raw instruction from memory, 13 bytes seems to be the largest possible instruction
rawinstruction = self.get_memory(self.EIP, 13)
if not rawinstruction:
print "[!] Problem fetching raw bytes from 0x%08x" % (self.EIP)
return False
# Decode instruction from raw returning a pydasm.instruction
instruction = pydasm.get_instruction(rawinstruction, pydasm.MODE_32)
if not instruction:
print "[!] Problem decoding instruction"
return False
# Create our python class for instruction, we do this in case we ever leave pydasm
pyinstruction = PyInstruction(instruction)
self.executed_instructions[self.EIP] = pyinstruction
pyinstruction = self.executed_instructions[self.EIP]
if self.DEBUG > 0:
print "[*] Executing [0x%x][%x] %s" % (self.EIP, pyinstruction.opcode, self.get_disasm())
# We must split any prefix sense we use flags
pyinstruction.mnemonic = pyinstruction.mnemonic.split()
if pyinstruction.mnemonic[0] in ["rep", "repe", "repne", "lock"]:
pyinstruction.mnemonic = pyinstruction.mnemonic[1]
pyinstruction.mnemonic = pyinstruction.mnemonic[0]
# Check if we support this instruction
if pyinstruction.mnemonic in self.supported_instructions:
# Execute!
if not self.supported_instructions[pyinstruction.mnemonic](pyinstruction):
return False
print "[!] Unsupported instruction %s" % pyinstruction.mnemonic
return False
# If EIP has not changed we advance to the next instruction in code
if self.EIP == oldeip:
self.EIP += pyinstruction.length
# Everything checked out
return True
def MOVSB(self, instruction):
op1 = instruction.op1
op2 = instruction.op2
oo = instruction.operand_so()
ao = instruction.address_so()
if oo:
osize = 2
osize = 4
if ao:
asize = 2
asize = 4
op1value = ""
op2value = ""
op3value = ""
op1valuederef = None
op2valuederef = None
if instruction.opcode == 0xa4:
osize = 1
if ao:
if instruction.rep():
repcount = self.get_register16("CX")
while repcount > 0:
op1value = self.ES + self.get_register16("DI")
op2value = self.DS + self.get_register16("SI")
op2valuederef = self.get_memory(op2value, osize)
self.set_memory(op1value, op2valuederef, osize)
if not self.DF:
self.set_register16("DI", op1value + osize)
self.set_register16("SI", op2value + osize)
self.set_register16("DI", op1value - osize)
self.set_register16("SI", op2value - osize)
repcount -= 1
self.set_register16("CX", repcount)
op1value = self.ES + self.get_register16("DI")
op2value = self.DS + self.get_register16("SI")
op2valuederef = self.get_memory(op2value, osize)
self.set_memory(op1value, op2valuederef, osize)
if not self.DF:
self.set_register16("DI", op1value + osize)
self.set_register16("SI", op2value + osize)
self.set_register16("DI", op1value - osize)
self.set_register16("SI", op2value - osize)
if instruction.rep():
repcount = self.get_register32("ECX")
while repcount > 0:
op1value = self.get_register32("EDI")
op2value = self.get_register32("ESI")
op2valuederef = self.get_memory(op2value, osize)
self.set_memory(op1value, op2valuederef, osize)
if not self.DF:
self.set_register32("EDI", op1value + osize)
self.set_register32("ESI", op2value + osize)
self.set_register32("EDI", op1value - osize)
self.set_register32("ESI", op2value - osize)
repcount -= 1
self.set_register32("ECX", repcount)
op1value = self.get_register32("EDI")
op2value = self.get_register32("ESI")
op2valuederef = self.get_memory(op2value, osize)
self.set_memory(op1value, op2valuederef, osize)
if not self.DF:
self.set_register32("EDI", op1value + osize)
self.set_register32("ESI", op2value + osize)
self.set_register32("EDI", op1value - osize)
self.set_register32("ESI", op2value - osize)
opcode = instruction.opcode
if opcode in self.emu.opcode_handlers:
if op1valuederef != None and op2valuederef == None:
self.emu.opcode_handlers[opcode](self.emu, opcode, self.get_register32("EIP"), op1valuederef, op2value, op3value)
elif op2valuederef != None and op1valuederef == None:
self.emu.opcode_handlers[opcode](self.emu, opcode, self.get_register32("EIP"), op1value, op2valuederef, op3value)
self.emu.opcode_handlers[opcode](self.emu, opcode, self.get_register32("EIP"), op1value, op2value, op3value)
return False
mnemonic = instruction.mnemonic.upper()
if mnemonic in self.emu.mnemonic_handlers:
if op1valuederef != None and op2valuederef == None:
self.emu.mnemonic_handlers[mnemonic](self.emu, mnemonic, self.get_register32("EIP"), op1valuederef, op2value, op3value)
elif op2valuederef != None and op1valuederef == None:
self.emu.mnemonic_handlers[mnemonic](self.emu, mnemonic, self.get_register32("EIP"), op1value, op2valuederef, op3value)
self.emu.mnemonic_handlers[mnemonic](self.emu, mnemonic, self.get_register32("EIP"), op1value, op2value, op3value)
return True