miasm2.jitter.loader.pe module
import os import struct import logging from collections import defaultdict from elfesteem import pe from elfesteem import cstruct from elfesteem import * from miasm2.jitter.csts import * from miasm2.jitter.loader.utils import canon_libname_libfunc, libimp log = logging.getLogger('loader_pe') hnd = logging.StreamHandler() hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s")) log.addHandler(hnd) log.setLevel(logging.INFO) def get_pe_dependencies(pe_obj): """Return dependency set @pe_obj: pe object""" if pe_obj.DirImport.impdesc is None: return set() out = set() for dependency in pe_obj.DirImport.impdesc: libname = dependency.dlldescname.name.lower() out.add(libname) return out def get_import_address_pe(e): import2addr = defaultdict(set) if e.DirImport.impdesc is None: return import2addr for s in e.DirImport.impdesc: # fthunk = e.rva2virt(s.firstthunk) # l = "%2d %-25s %s" % (i, repr(s.dlldescname), repr(s)) libname = s.dlldescname.name.lower() for ii, imp in enumerate(s.impbynames): if isinstance(imp, pe.ImportByName): funcname = imp.name else: funcname = imp # l = " %2d %-16s" % (ii, repr(funcname)) import2addr[(libname, funcname)].add( e.rva2virt(s.firstthunk + e._wsize * ii / 8)) return import2addr def preload_pe(vm, e, runtime_lib, patch_vm_imp=True): fa = get_import_address_pe(e) dyn_funcs = {} # log.debug('imported funcs: %s' % fa) for (libname, libfunc), ads in fa.items(): for ad in ads: ad_base_lib = runtime_lib.lib_get_add_base(libname) ad_libfunc = runtime_lib.lib_get_add_func(ad_base_lib, libfunc, ad) libname_s = canon_libname_libfunc(libname, libfunc) dyn_funcs[libname_s] = ad_libfunc if patch_vm_imp: vm.set_mem( ad, struct.pack(cstruct.size2type[e._wsize], ad_libfunc)) return dyn_funcs def is_redirected_export(pe_obj, addr): """Test if the @addr is a forwarded export address. If so, return dllname/function name couple. If not, return False. An export address is a forwarded export if the rva is in the export directory of the pe. @pe_obj: PE instance @addr: virtual address of the function to test """ export_dir = pe_obj.NThdr.optentries[pe.DIRECTORY_ENTRY_EXPORT] addr_rva = pe_obj.virt2rva(addr) if not (export_dir.rva <= addr_rva < export_dir.rva + export_dir.size): return False addr_end = pe_obj.virt.find('\x00', addr) data = pe_obj.virt.get(addr, addr_end) dllname, func_info = data.split('.', 1) dllname = dllname.lower() # Test if function is forwarded using ordinal if func_info.startswith('#'): func_info = int(func_info[1:]) return dllname, func_info def get_export_name_addr_list(e): out = [] # add func name for i, n in enumerate(e.DirExport.f_names): addr = e.DirExport.f_address[e.DirExport.f_nameordinals[i].ordinal] f_name = n.name.name # log.debug('%s %s' % (f_name, hex(e.rva2virt(addr.rva)))) out.append((f_name, e.rva2virt(addr.rva))) # add func ordinal for i, o in enumerate(e.DirExport.f_nameordinals): addr = e.DirExport.f_address[o.ordinal] # log.debug('%s %s %s' % (o.ordinal, e.DirExport.expdesc.base, # hex(e.rva2virt(addr.rva)))) out.append( (o.ordinal + e.DirExport.expdesc.base, e.rva2virt(addr.rva))) for i, s in enumerate(e.DirExport.f_address): if not s.rva: continue out.append((i + e.DirExport.expdesc.base, e.rva2virt(s.rva))) return out def vm_load_pe(vm, fdata, align_s=True, load_hdr=True, name="", **kargs): """Load a PE in memory (@vm) from a data buffer @fdata @vm: VmMngr instance @fdata: data buffer to parse @align_s: (optional) If False, keep gaps between section @load_hdr: (optional) If False, do not load the NThdr in memory Return the corresponding PE instance. Extra arguments are passed to PE instanciation. If all sections are aligned, they will be mapped on several different pages Otherwise, a big page is created, containing all sections """ # Parse and build a PE instance pe = pe_init.PE(fdata, **kargs) # Check if all section are aligned aligned = True for section in pe.SHList: if section.addr & 0xFFF: aligned = False break if aligned: # Loader NT header if load_hdr: # Header length hdr_len = max(0x200, pe.NThdr.sizeofheaders) # Page minimum size min_len = min(pe.SHList[0].addr, 0x1000) # Get and pad the pe_hdr pe_hdr = pe.content[:hdr_len] + max( 0, (min_len - hdr_len)) * "\x00" vm.add_memory_page(pe.NThdr.ImageBase, PAGE_READ | PAGE_WRITE, pe_hdr, "%r: PE Header" % name) # Align sections size if align_s: # Use the next section address to compute the new size for i, section in enumerate(pe.SHList[:-1]): new_size = pe.SHList[i + 1].addr - section.addr section.size = new_size section.rawsize = new_size section.data = strpatchwork.StrPatchwork( section.data[:new_size] ) section.offset = section.addr # Last section alignement last_section = pe.SHList[-1] last_section.size = (last_section.size + 0xfff) & 0xfffff000 # Pad sections with null bytes and map them for section in pe.SHList: data = str(section.data) data += "\x00" * (section.size - len(data)) attrib = PAGE_READ if section.flags & 0x80000000: attrib |= PAGE_WRITE vm.add_memory_page(pe.rva2virt(section.addr), attrib, data, "%r: %r" % (name, section.name)) return pe # At least one section is not aligned log.warning('PE is not aligned, creating big section') min_addr = 0 if load_hdr else None max_addr = None data = "" for i, section in enumerate(pe.SHList): if i < len(pe.SHList) - 1: # If it is not the last section, use next section address section.size = pe.SHList[i + 1].addr - section.addr section.rawsize = section.size section.offset = section.addr # Update min and max addresses if min_addr is None or section.addr < min_addr: min_addr = section.addr max_section_len = max(section.size, len(section.data)) if max_addr is None or section.addr + max_section_len > max_addr: max_addr = section.addr + max_section_len min_addr = pe.rva2virt(min_addr) max_addr = pe.rva2virt(max_addr) log.debug('Min: 0x%x, Max: 0x%x, Size: 0x%x', min_addr, max_addr, (max_addr - min_addr)) # Create only one big section containing the whole PE vm.add_memory_page(min_addr, PAGE_READ | PAGE_WRITE, (max_addr - min_addr) * "\x00") # Copy each sections content in memory for section in pe.SHList: log.debug('Map 0x%x bytes to 0x%x', len(section.data), pe.rva2virt(section.addr)) vm.set_mem(pe.rva2virt(section.addr), str(section.data)) return pe def vm_load_pe_lib(vm, fname_in, libs, lib_path_base, **kargs): """Call vm_load_pe on @fname_in and update @libs accordingly @vm: VmMngr instance @fname_in: library name @libs: libimp_pe instance @lib_path_base: DLLs relative path Return the corresponding PE instance Extra arguments are passed to vm_load_pe """ log.info('Loading module %r', fname_in) fname = os.path.join(lib_path_base, fname_in) with open(fname, "rb") as fstream: pe = vm_load_pe(vm, fstream.read(), name=fname_in, **kargs) libs.add_export_lib(pe, fname_in) return pe def vm_load_pe_libs(vm, libs_name, libs, lib_path_base, **kargs): """Call vm_load_pe_lib on each @libs_name filename @vm: VmMngr instance @libs_name: list of str @libs: libimp_pe instance @lib_path_base: (optional) DLLs relative path Return a dictionary Filename -> PE instances Extra arguments are passed to vm_load_pe_lib """ return {fname: vm_load_pe_lib(vm, fname, libs, lib_path_base, **kargs) for fname in libs_name} def vm_fix_imports_pe_libs(lib_imgs, libs, lib_path_base, patch_vm_imp=True, **kargs): for e in lib_imgs.values(): preload_pe(e, libs, patch_vm_imp) def vm2pe(myjit, fname, libs=None, e_orig=None, min_addr=None, max_addr=None, min_section_offset=0x1000, img_base=None, added_funcs=None, **kwargs): if e_orig: size = e_orig._wsize else: size = 32 mye = pe_init.PE(wsize=size) if min_addr is None and e_orig is not None: min_addr = min([e_orig.rva2virt(s.addr) for s in e_orig.SHList]) if max_addr is None and e_orig is not None: max_addr = max([e_orig.rva2virt(s.addr + s.size) for s in e_orig.SHList]) if img_base is None: img_base = e_orig.NThdr.ImageBase mye.NThdr.ImageBase = img_base all_mem = myjit.vm.get_all_memory() addrs = all_mem.keys() addrs.sort() mye.Opthdr.AddressOfEntryPoint = mye.virt2rva(myjit.pc) first = True for ad in addrs: if not min_addr <= ad < max_addr: continue log.debug("0x%x", ad) if first: mye.SHList.add_section( "%.8X" % ad, addr=ad - mye.NThdr.ImageBase, data=all_mem[ad]['data'], offset=min_section_offset) else: mye.SHList.add_section( "%.8X" % ad, addr=ad - mye.NThdr.ImageBase, data=all_mem[ad]['data']) first = False if libs: if added_funcs is not None: # name_inv = dict([(x[1], x[0]) for x in libs.name2off.items()]) for addr, funcaddr in added_funcs: libbase, dllname = libs.fad2info[funcaddr] libs.lib_get_add_func(libbase, dllname, addr) filter_import = kwargs.get( 'filter_import', lambda _, ad: mye.virt.is_addr_in(ad)) new_dll = libs.gen_new_lib(mye, filter_import) else: new_dll = {} log.debug('%s', new_dll) mye.DirImport.add_dlldesc(new_dll) s_imp = mye.SHList.add_section("import", rawsize=len(mye.DirImport)) mye.DirImport.set_rva(s_imp.addr) log.debug('%r', mye.SHList) if e_orig: # resource xx = str(mye) mye.content = xx ad = e_orig.NThdr.optentries[pe.DIRECTORY_ENTRY_RESOURCE].rva size = e_orig.NThdr.optentries[pe.DIRECTORY_ENTRY_RESOURCE].size log.debug('dirres 0x%x', ad) if ad != 0: mye.NThdr.optentries[pe.DIRECTORY_ENTRY_RESOURCE].rva = ad mye.NThdr.optentries[pe.DIRECTORY_ENTRY_RESOURCE].size = size mye.DirRes = pe.DirRes.unpack(mye.img_rva, ad, mye) log.debug('%r', mye.DirRes) s_res = mye.SHList.add_section(name="myres", rawsize=len(mye.DirRes)) mye.DirRes.set_rva(s_res.addr) # generation open(fname, 'wb').write(str(mye)) return mye class libimp_pe(libimp): def __init__(self, *args, **kwargs): super(libimp_pe, self).__init__(*args, **kwargs) # dependency -> redirector self.created_redirected_imports = {} def add_export_lib(self, e, name): if name in self.created_redirected_imports: log.error("%r has previously been created due to redirect\ imports due to %r. Change the loading order.", name, self.created_redirected_imports[name]) raise RuntimeError('Bad import: loading previously created import') self.all_exported_lib.append(e) # will add real lib addresses to database if name in self.name2off: ad = self.name2off[name] if e is not None and name in self.fake_libs: log.error( "You are trying to load %r but it has been faked previously. Try loading this module earlier.", name) raise RuntimeError("Bad import") else: log.debug('new lib %s', name) ad = e.NThdr.ImageBase libad = ad self.name2off[name] = ad self.libbase2lastad[ad] = ad + 0x1 self.lib_imp2ad[ad] = {} self.lib_imp2dstad[ad] = {} self.libbase_ad += 0x1000 ads = get_export_name_addr_list(e) todo = ads # done = [] while todo: # for imp_ord_or_name, ad in ads: imp_ord_or_name, ad = todo.pop() # if export is a redirection, search redirected dll # and get function real addr ret = is_redirected_export(e, ad) if ret: exp_dname, exp_fname = ret exp_dname = exp_dname + '.dll' exp_dname = exp_dname.lower() # if dll auto refes in redirection if exp_dname == name: libad_tmp = self.name2off[exp_dname] if not exp_fname in self.lib_imp2ad[libad_tmp]: # schedule func todo = [(imp_ord_or_name, ad)] + todo continue else: # import redirected lib from non loaded dll if not exp_dname in self.name2off: self.created_redirected_imports.setdefault( exp_dname, set()).add(name) # Ensure import entry is created new_lib_base = self.lib_get_add_base(exp_dname) # Ensure function entry is created _ = self.lib_get_add_func(new_lib_base, exp_fname) c_name = canon_libname_libfunc(exp_dname, exp_fname) libad_tmp = self.name2off[exp_dname] ad = self.lib_imp2ad[libad_tmp][exp_fname] self.lib_imp2ad[libad][imp_ord_or_name] = ad name_inv = dict([(x[1], x[0]) for x in self.name2off.items()]) c_name = canon_libname_libfunc( name_inv[libad], imp_ord_or_name) self.fad2cname[ad] = c_name self.cname2addr[c_name] = ad log.debug("Add func %s %s", hex(ad), c_name) self.fad2info[ad] = libad, imp_ord_or_name def gen_new_lib(self, target_pe, filter_import=lambda peobj, ad: True, **kwargs): """Gen a new DirImport description @target_pe: PE instance @filter_import: (boolean f(pe, address)) restrict addresses to keep """ new_lib = [] for lib_name, ad in self.name2off.items(): # Build an IMAGE_IMPORT_DESCRIPTOR # Get fixed addresses out_ads = dict() # addr -> func_name for func_name, dst_addresses in self.lib_imp2dstad[ad].items(): out_ads.update({addr: func_name for addr in dst_addresses}) # Filter available addresses according to @filter_import all_ads = [ addr for addr in out_ads.keys() if filter_import(target_pe, addr)] if not all_ads: continue # Keep non-NULL elements all_ads.sort() for i, x in enumerate(all_ads): if x not in [0, None]: break all_ads = all_ads[i:] log.debug('ads: %s', map(hex, all_ads)) while all_ads: # Find libname's Import Address Table othunk = all_ads[0] i = 0 while (i + 1 < len(all_ads) and all_ads[i] + target_pe._wsize / 8 == all_ads[i + 1]): i += 1 # 'i + 1' is IAT's length # Effectively build an IMAGE_IMPORT_DESCRIPTOR funcs = [out_ads[addr] for addr in all_ads[:i + 1]] try: rva = target_pe.virt2rva(othunk) except pe.InvalidOffset: pass else: new_lib.append(({"name": lib_name, "firstthunk": rva}, funcs) ) # Update elements to handle all_ads = all_ads[i + 1:] return new_lib def vm_load_pe_and_dependencies(vm, fname, name2module, runtime_lib, lib_path_base, **kwargs): """Load a binary and all its dependencies. Returns a dictionary containing the association between binaries names and it's pe object @vm: virtual memory manager instance @fname: full path of the binary @name2module: dict containing association between name and pe object. Updated. @runtime_lib: libimp instance @lib_path_base: directory of the libraries containing dependencies """ todo = [(fname, fname, 0)] weight2name = {} done = set() # Walk dependencies recursively while todo: name, fname, weight = todo.pop() if name in done: continue done.add(name) weight2name.setdefault(weight, set()).add(name) if name in name2module: pe_obj = name2module[name] else: try: with open(fname, "rb") as fstream: log.info('Loading module name %r', fname) pe_obj = vm_load_pe( vm, fstream.read(), name=fname, **kwargs) except IOError: log.error('Cannot open %s' % fname) name2module[name] = None continue name2module[name] = pe_obj new_dependencies = get_pe_dependencies(pe_obj) todo += [(name, os.path.join(lib_path_base, name), weight - 1) for name in new_dependencies] ordered_modules = sorted(weight2name.items()) for _, modules in ordered_modules: for name in modules: pe_obj = name2module[name] if pe_obj is None: continue # Fix imports if pe_obj.DirExport: runtime_lib.add_export_lib(pe_obj, name) for pe_obj in name2module.itervalues(): if pe_obj is None: continue preload_pe(vm, pe_obj, runtime_lib, patch_vm_imp=True) return name2module # machine -> arch PE_machine = {0x14c: "x86_32", 0x8664: "x86_64", } def guess_arch(pe): """Return the architecture specified by the PE container @pe. If unknown, return None""" return PE_machine.get(pe.Coffhdr.machine, None)
Module variables
var BREAKPOINT_READ
var BREAKPOINT_WRITE
var EXCEPT_ACCESS_VIOL
var EXCEPT_BREAKPOINT_INTERN
var EXCEPT_BREAKPOINT_MEMORY
var EXCEPT_CODE_AUTOMOD
var EXCEPT_DIV_BY_ZERO
var EXCEPT_DO_NOT_UPDATE_PC
var EXCEPT_ILLEGAL_INSN
var EXCEPT_INT_XX
var EXCEPT_NUM_UPDT_EIP
var EXCEPT_PRIV_INSN
var EXCEPT_SOFT_BP
var EXCEPT_SPR_ACCESS
var EXCEPT_UNK_MNEMO
var PAGE_EXEC
var PAGE_READ
var PAGE_WRITE
var PE_machine
var hnd
var log
Functions
def get_export_name_addr_list(
e)
def get_export_name_addr_list(e): out = [] # add func name for i, n in enumerate(e.DirExport.f_names): addr = e.DirExport.f_address[e.DirExport.f_nameordinals[i].ordinal] f_name = n.name.name # log.debug('%s %s' % (f_name, hex(e.rva2virt(addr.rva)))) out.append((f_name, e.rva2virt(addr.rva))) # add func ordinal for i, o in enumerate(e.DirExport.f_nameordinals): addr = e.DirExport.f_address[o.ordinal] # log.debug('%s %s %s' % (o.ordinal, e.DirExport.expdesc.base, # hex(e.rva2virt(addr.rva)))) out.append( (o.ordinal + e.DirExport.expdesc.base, e.rva2virt(addr.rva))) for i, s in enumerate(e.DirExport.f_address): if not s.rva: continue out.append((i + e.DirExport.expdesc.base, e.rva2virt(s.rva))) return out
def get_import_address_pe(
e)
def get_import_address_pe(e): import2addr = defaultdict(set) if e.DirImport.impdesc is None: return import2addr for s in e.DirImport.impdesc: # fthunk = e.rva2virt(s.firstthunk) # l = "%2d %-25s %s" % (i, repr(s.dlldescname), repr(s)) libname = s.dlldescname.name.lower() for ii, imp in enumerate(s.impbynames): if isinstance(imp, pe.ImportByName): funcname = imp.name else: funcname = imp # l = " %2d %-16s" % (ii, repr(funcname)) import2addr[(libname, funcname)].add( e.rva2virt(s.firstthunk + e._wsize * ii / 8)) return import2addr
def get_pe_dependencies(
pe_obj)
Return dependency set @pe_obj: pe object
def get_pe_dependencies(pe_obj): """Return dependency set @pe_obj: pe object""" if pe_obj.DirImport.impdesc is None: return set() out = set() for dependency in pe_obj.DirImport.impdesc: libname = dependency.dlldescname.name.lower() out.add(libname) return out
def guess_arch(
pe)
Return the architecture specified by the PE container @pe. If unknown, return None
def guess_arch(pe): """Return the architecture specified by the PE container @pe. If unknown, return None""" return PE_machine.get(pe.Coffhdr.machine, None)
def is_redirected_export(
pe_obj, addr)
Test if the @addr is a forwarded export address. If so, return dllname/function name couple. If not, return False.
An export address is a forwarded export if the rva is in the export directory of the pe.
@pe_obj: PE instance @addr: virtual address of the function to test
def is_redirected_export(pe_obj, addr): """Test if the @addr is a forwarded export address. If so, return dllname/function name couple. If not, return False. An export address is a forwarded export if the rva is in the export directory of the pe. @pe_obj: PE instance @addr: virtual address of the function to test """ export_dir = pe_obj.NThdr.optentries[pe.DIRECTORY_ENTRY_EXPORT] addr_rva = pe_obj.virt2rva(addr) if not (export_dir.rva <= addr_rva < export_dir.rva + export_dir.size): return False addr_end = pe_obj.virt.find('\x00', addr) data = pe_obj.virt.get(addr, addr_end) dllname, func_info = data.split('.', 1) dllname = dllname.lower() # Test if function is forwarded using ordinal if func_info.startswith('#'): func_info = int(func_info[1:]) return dllname, func_info
def preload_pe(
vm, e, runtime_lib, patch_vm_imp=True)
def preload_pe(vm, e, runtime_lib, patch_vm_imp=True): fa = get_import_address_pe(e) dyn_funcs = {} # log.debug('imported funcs: %s' % fa) for (libname, libfunc), ads in fa.items(): for ad in ads: ad_base_lib = runtime_lib.lib_get_add_base(libname) ad_libfunc = runtime_lib.lib_get_add_func(ad_base_lib, libfunc, ad) libname_s = canon_libname_libfunc(libname, libfunc) dyn_funcs[libname_s] = ad_libfunc if patch_vm_imp: vm.set_mem( ad, struct.pack(cstruct.size2type[e._wsize], ad_libfunc)) return dyn_funcs
def vm2pe(
myjit, fname, libs=None, e_orig=None, min_addr=None, max_addr=None, min_section_offset=4096, img_base=None, added_funcs=None, **kwargs)
def vm2pe(myjit, fname, libs=None, e_orig=None, min_addr=None, max_addr=None, min_section_offset=0x1000, img_base=None, added_funcs=None, **kwargs): if e_orig: size = e_orig._wsize else: size = 32 mye = pe_init.PE(wsize=size) if min_addr is None and e_orig is not None: min_addr = min([e_orig.rva2virt(s.addr) for s in e_orig.SHList]) if max_addr is None and e_orig is not None: max_addr = max([e_orig.rva2virt(s.addr + s.size) for s in e_orig.SHList]) if img_base is None: img_base = e_orig.NThdr.ImageBase mye.NThdr.ImageBase = img_base all_mem = myjit.vm.get_all_memory() addrs = all_mem.keys() addrs.sort() mye.Opthdr.AddressOfEntryPoint = mye.virt2rva(myjit.pc) first = True for ad in addrs: if not min_addr <= ad < max_addr: continue log.debug("0x%x", ad) if first: mye.SHList.add_section( "%.8X" % ad, addr=ad - mye.NThdr.ImageBase, data=all_mem[ad]['data'], offset=min_section_offset) else: mye.SHList.add_section( "%.8X" % ad, addr=ad - mye.NThdr.ImageBase, data=all_mem[ad]['data']) first = False if libs: if added_funcs is not None: # name_inv = dict([(x[1], x[0]) for x in libs.name2off.items()]) for addr, funcaddr in added_funcs: libbase, dllname = libs.fad2info[funcaddr] libs.lib_get_add_func(libbase, dllname, addr) filter_import = kwargs.get( 'filter_import', lambda _, ad: mye.virt.is_addr_in(ad)) new_dll = libs.gen_new_lib(mye, filter_import) else: new_dll = {} log.debug('%s', new_dll) mye.DirImport.add_dlldesc(new_dll) s_imp = mye.SHList.add_section("import", rawsize=len(mye.DirImport)) mye.DirImport.set_rva(s_imp.addr) log.debug('%r', mye.SHList) if e_orig: # resource xx = str(mye) mye.content = xx ad = e_orig.NThdr.optentries[pe.DIRECTORY_ENTRY_RESOURCE].rva size = e_orig.NThdr.optentries[pe.DIRECTORY_ENTRY_RESOURCE].size log.debug('dirres 0x%x', ad) if ad != 0: mye.NThdr.optentries[pe.DIRECTORY_ENTRY_RESOURCE].rva = ad mye.NThdr.optentries[pe.DIRECTORY_ENTRY_RESOURCE].size = size mye.DirRes = pe.DirRes.unpack(mye.img_rva, ad, mye) log.debug('%r', mye.DirRes) s_res = mye.SHList.add_section(name="myres", rawsize=len(mye.DirRes)) mye.DirRes.set_rva(s_res.addr) # generation open(fname, 'wb').write(str(mye)) return mye
def vm_fix_imports_pe_libs(
lib_imgs, libs, lib_path_base, patch_vm_imp=True, **kargs)
def vm_fix_imports_pe_libs(lib_imgs, libs, lib_path_base, patch_vm_imp=True, **kargs): for e in lib_imgs.values(): preload_pe(e, libs, patch_vm_imp)
def vm_load_pe(
vm, fdata, align_s=True, load_hdr=True, name='', **kargs)
Load a PE in memory (@vm) from a data buffer @fdata @vm: VmMngr instance @fdata: data buffer to parse @align_s: (optional) If False, keep gaps between section @load_hdr: (optional) If False, do not load the NThdr in memory Return the corresponding PE instance.
Extra arguments are passed to PE instanciation. If all sections are aligned, they will be mapped on several different pages Otherwise, a big page is created, containing all sections
def vm_load_pe(vm, fdata, align_s=True, load_hdr=True, name="", **kargs): """Load a PE in memory (@vm) from a data buffer @fdata @vm: VmMngr instance @fdata: data buffer to parse @align_s: (optional) If False, keep gaps between section @load_hdr: (optional) If False, do not load the NThdr in memory Return the corresponding PE instance. Extra arguments are passed to PE instanciation. If all sections are aligned, they will be mapped on several different pages Otherwise, a big page is created, containing all sections """ # Parse and build a PE instance pe = pe_init.PE(fdata, **kargs) # Check if all section are aligned aligned = True for section in pe.SHList: if section.addr & 0xFFF: aligned = False break if aligned: # Loader NT header if load_hdr: # Header length hdr_len = max(0x200, pe.NThdr.sizeofheaders) # Page minimum size min_len = min(pe.SHList[0].addr, 0x1000) # Get and pad the pe_hdr pe_hdr = pe.content[:hdr_len] + max( 0, (min_len - hdr_len)) * "\x00" vm.add_memory_page(pe.NThdr.ImageBase, PAGE_READ | PAGE_WRITE, pe_hdr, "%r: PE Header" % name) # Align sections size if align_s: # Use the next section address to compute the new size for i, section in enumerate(pe.SHList[:-1]): new_size = pe.SHList[i + 1].addr - section.addr section.size = new_size section.rawsize = new_size section.data = strpatchwork.StrPatchwork( section.data[:new_size] ) section.offset = section.addr # Last section alignement last_section = pe.SHList[-1] last_section.size = (last_section.size + 0xfff) & 0xfffff000 # Pad sections with null bytes and map them for section in pe.SHList: data = str(section.data) data += "\x00" * (section.size - len(data)) attrib = PAGE_READ if section.flags & 0x80000000: attrib |= PAGE_WRITE vm.add_memory_page(pe.rva2virt(section.addr), attrib, data, "%r: %r" % (name, section.name)) return pe # At least one section is not aligned log.warning('PE is not aligned, creating big section') min_addr = 0 if load_hdr else None max_addr = None data = "" for i, section in enumerate(pe.SHList): if i < len(pe.SHList) - 1: # If it is not the last section, use next section address section.size = pe.SHList[i + 1].addr - section.addr section.rawsize = section.size section.offset = section.addr # Update min and max addresses if min_addr is None or section.addr < min_addr: min_addr = section.addr max_section_len = max(section.size, len(section.data)) if max_addr is None or section.addr + max_section_len > max_addr: max_addr = section.addr + max_section_len min_addr = pe.rva2virt(min_addr) max_addr = pe.rva2virt(max_addr) log.debug('Min: 0x%x, Max: 0x%x, Size: 0x%x', min_addr, max_addr, (max_addr - min_addr)) # Create only one big section containing the whole PE vm.add_memory_page(min_addr, PAGE_READ | PAGE_WRITE, (max_addr - min_addr) * "\x00") # Copy each sections content in memory for section in pe.SHList: log.debug('Map 0x%x bytes to 0x%x', len(section.data), pe.rva2virt(section.addr)) vm.set_mem(pe.rva2virt(section.addr), str(section.data)) return pe
def vm_load_pe_and_dependencies(
vm, fname, name2module, runtime_lib, lib_path_base, **kwargs)
Load a binary and all its dependencies. Returns a dictionary containing the association between binaries names and it's pe object
@vm: virtual memory manager instance @fname: full path of the binary @name2module: dict containing association between name and pe object. Updated. @runtime_lib: libimp instance @lib_path_base: directory of the libraries containing dependencies
def vm_load_pe_and_dependencies(vm, fname, name2module, runtime_lib, lib_path_base, **kwargs): """Load a binary and all its dependencies. Returns a dictionary containing the association between binaries names and it's pe object @vm: virtual memory manager instance @fname: full path of the binary @name2module: dict containing association between name and pe object. Updated. @runtime_lib: libimp instance @lib_path_base: directory of the libraries containing dependencies """ todo = [(fname, fname, 0)] weight2name = {} done = set() # Walk dependencies recursively while todo: name, fname, weight = todo.pop() if name in done: continue done.add(name) weight2name.setdefault(weight, set()).add(name) if name in name2module: pe_obj = name2module[name] else: try: with open(fname, "rb") as fstream: log.info('Loading module name %r', fname) pe_obj = vm_load_pe( vm, fstream.read(), name=fname, **kwargs) except IOError: log.error('Cannot open %s' % fname) name2module[name] = None continue name2module[name] = pe_obj new_dependencies = get_pe_dependencies(pe_obj) todo += [(name, os.path.join(lib_path_base, name), weight - 1) for name in new_dependencies] ordered_modules = sorted(weight2name.items()) for _, modules in ordered_modules: for name in modules: pe_obj = name2module[name] if pe_obj is None: continue # Fix imports if pe_obj.DirExport: runtime_lib.add_export_lib(pe_obj, name) for pe_obj in name2module.itervalues(): if pe_obj is None: continue preload_pe(vm, pe_obj, runtime_lib, patch_vm_imp=True) return name2module
def vm_load_pe_lib(
vm, fname_in, libs, lib_path_base, **kargs)
Call vm_load_pe on @fname_in and update @libs accordingly @vm: VmMngr instance @fname_in: library name @libs: libimp_pe instance @lib_path_base: DLLs relative path Return the corresponding PE instance Extra arguments are passed to vm_load_pe
def vm_load_pe_lib(vm, fname_in, libs, lib_path_base, **kargs): """Call vm_load_pe on @fname_in and update @libs accordingly @vm: VmMngr instance @fname_in: library name @libs: libimp_pe instance @lib_path_base: DLLs relative path Return the corresponding PE instance Extra arguments are passed to vm_load_pe """ log.info('Loading module %r', fname_in) fname = os.path.join(lib_path_base, fname_in) with open(fname, "rb") as fstream: pe = vm_load_pe(vm, fstream.read(), name=fname_in, **kargs) libs.add_export_lib(pe, fname_in) return pe
def vm_load_pe_libs(
vm, libs_name, libs, lib_path_base, **kargs)
Call vm_load_pe_lib on each @libs_name filename @vm: VmMngr instance @libs_name: list of str @libs: libimp_pe instance @lib_path_base: (optional) DLLs relative path Return a dictionary Filename -> PE instances Extra arguments are passed to vm_load_pe_lib
def vm_load_pe_libs(vm, libs_name, libs, lib_path_base, **kargs): """Call vm_load_pe_lib on each @libs_name filename @vm: VmMngr instance @libs_name: list of str @libs: libimp_pe instance @lib_path_base: (optional) DLLs relative path Return a dictionary Filename -> PE instances Extra arguments are passed to vm_load_pe_lib """ return {fname: vm_load_pe_lib(vm, fname, libs, lib_path_base, **kargs) for fname in libs_name}
Classes
class libimp_pe
class libimp_pe(libimp): def __init__(self, *args, **kwargs): super(libimp_pe, self).__init__(*args, **kwargs) # dependency -> redirector self.created_redirected_imports = {} def add_export_lib(self, e, name): if name in self.created_redirected_imports: log.error("%r has previously been created due to redirect\ imports due to %r. Change the loading order.", name, self.created_redirected_imports[name]) raise RuntimeError('Bad import: loading previously created import') self.all_exported_lib.append(e) # will add real lib addresses to database if name in self.name2off: ad = self.name2off[name] if e is not None and name in self.fake_libs: log.error( "You are trying to load %r but it has been faked previously. Try loading this module earlier.", name) raise RuntimeError("Bad import") else: log.debug('new lib %s', name) ad = e.NThdr.ImageBase libad = ad self.name2off[name] = ad self.libbase2lastad[ad] = ad + 0x1 self.lib_imp2ad[ad] = {} self.lib_imp2dstad[ad] = {} self.libbase_ad += 0x1000 ads = get_export_name_addr_list(e) todo = ads # done = [] while todo: # for imp_ord_or_name, ad in ads: imp_ord_or_name, ad = todo.pop() # if export is a redirection, search redirected dll # and get function real addr ret = is_redirected_export(e, ad) if ret: exp_dname, exp_fname = ret exp_dname = exp_dname + '.dll' exp_dname = exp_dname.lower() # if dll auto refes in redirection if exp_dname == name: libad_tmp = self.name2off[exp_dname] if not exp_fname in self.lib_imp2ad[libad_tmp]: # schedule func todo = [(imp_ord_or_name, ad)] + todo continue else: # import redirected lib from non loaded dll if not exp_dname in self.name2off: self.created_redirected_imports.setdefault( exp_dname, set()).add(name) # Ensure import entry is created new_lib_base = self.lib_get_add_base(exp_dname) # Ensure function entry is created _ = self.lib_get_add_func(new_lib_base, exp_fname) c_name = canon_libname_libfunc(exp_dname, exp_fname) libad_tmp = self.name2off[exp_dname] ad = self.lib_imp2ad[libad_tmp][exp_fname] self.lib_imp2ad[libad][imp_ord_or_name] = ad name_inv = dict([(x[1], x[0]) for x in self.name2off.items()]) c_name = canon_libname_libfunc( name_inv[libad], imp_ord_or_name) self.fad2cname[ad] = c_name self.cname2addr[c_name] = ad log.debug("Add func %s %s", hex(ad), c_name) self.fad2info[ad] = libad, imp_ord_or_name def gen_new_lib(self, target_pe, filter_import=lambda peobj, ad: True, **kwargs): """Gen a new DirImport description @target_pe: PE instance @filter_import: (boolean f(pe, address)) restrict addresses to keep """ new_lib = [] for lib_name, ad in self.name2off.items(): # Build an IMAGE_IMPORT_DESCRIPTOR # Get fixed addresses out_ads = dict() # addr -> func_name for func_name, dst_addresses in self.lib_imp2dstad[ad].items(): out_ads.update({addr: func_name for addr in dst_addresses}) # Filter available addresses according to @filter_import all_ads = [ addr for addr in out_ads.keys() if filter_import(target_pe, addr)] if not all_ads: continue # Keep non-NULL elements all_ads.sort() for i, x in enumerate(all_ads): if x not in [0, None]: break all_ads = all_ads[i:] log.debug('ads: %s', map(hex, all_ads)) while all_ads: # Find libname's Import Address Table othunk = all_ads[0] i = 0 while (i + 1 < len(all_ads) and all_ads[i] + target_pe._wsize / 8 == all_ads[i + 1]): i += 1 # 'i + 1' is IAT's length # Effectively build an IMAGE_IMPORT_DESCRIPTOR funcs = [out_ads[addr] for addr in all_ads[:i + 1]] try: rva = target_pe.virt2rva(othunk) except pe.InvalidOffset: pass else: new_lib.append(({"name": lib_name, "firstthunk": rva}, funcs) ) # Update elements to handle all_ads = all_ads[i + 1:] return new_lib
Ancestors (in MRO)
- libimp_pe
- miasm2.jitter.loader.utils.libimp
- __builtin__.object
Instance variables
var created_redirected_imports
Methods
def __init__(
self, *args, **kwargs)
def __init__(self, *args, **kwargs): super(libimp_pe, self).__init__(*args, **kwargs) # dependency -> redirector self.created_redirected_imports = {}
def add_export_lib(
self, e, name)
def add_export_lib(self, e, name): if name in self.created_redirected_imports: log.error("%r has previously been created due to redirect\ imports due to %r. Change the loading order.", name, self.created_redirected_imports[name]) raise RuntimeError('Bad import: loading previously created import') self.all_exported_lib.append(e) # will add real lib addresses to database if name in self.name2off: ad = self.name2off[name] if e is not None and name in self.fake_libs: log.error( "You are trying to load %r but it has been faked previously. Try loading this module earlier.", name) raise RuntimeError("Bad import") else: log.debug('new lib %s', name) ad = e.NThdr.ImageBase libad = ad self.name2off[name] = ad self.libbase2lastad[ad] = ad + 0x1 self.lib_imp2ad[ad] = {} self.lib_imp2dstad[ad] = {} self.libbase_ad += 0x1000 ads = get_export_name_addr_list(e) todo = ads # done = [] while todo: # for imp_ord_or_name, ad in ads: imp_ord_or_name, ad = todo.pop() # if export is a redirection, search redirected dll # and get function real addr ret = is_redirected_export(e, ad) if ret: exp_dname, exp_fname = ret exp_dname = exp_dname + '.dll' exp_dname = exp_dname.lower() # if dll auto refes in redirection if exp_dname == name: libad_tmp = self.name2off[exp_dname] if not exp_fname in self.lib_imp2ad[libad_tmp]: # schedule func todo = [(imp_ord_or_name, ad)] + todo continue else: # import redirected lib from non loaded dll if not exp_dname in self.name2off: self.created_redirected_imports.setdefault( exp_dname, set()).add(name) # Ensure import entry is created new_lib_base = self.lib_get_add_base(exp_dname) # Ensure function entry is created _ = self.lib_get_add_func(new_lib_base, exp_fname) c_name = canon_libname_libfunc(exp_dname, exp_fname) libad_tmp = self.name2off[exp_dname] ad = self.lib_imp2ad[libad_tmp][exp_fname] self.lib_imp2ad[libad][imp_ord_or_name] = ad name_inv = dict([(x[1], x[0]) for x in self.name2off.items()]) c_name = canon_libname_libfunc( name_inv[libad], imp_ord_or_name) self.fad2cname[ad] = c_name self.cname2addr[c_name] = ad log.debug("Add func %s %s", hex(ad), c_name) self.fad2info[ad] = libad, imp_ord_or_name
def check_dst_ad(
self)
def check_dst_ad(self): for ad in self.lib_imp2dstad: all_ads = self.lib_imp2dstad[ad].values() all_ads.sort() for i, x in enumerate(all_ads[:-1]): if x is None or all_ads[i + 1] is None: return False if x + 4 != all_ads[i + 1]: return False return True
def gen_new_lib(
self, target_pe, filter_import=<function <lambda> at 0x7f19ada73050>, **kwargs)
Gen a new DirImport description @target_pe: PE instance @filter_import: (boolean f(pe, address)) restrict addresses to keep
def gen_new_lib(self, target_pe, filter_import=lambda peobj, ad: True, **kwargs): """Gen a new DirImport description @target_pe: PE instance @filter_import: (boolean f(pe, address)) restrict addresses to keep """ new_lib = [] for lib_name, ad in self.name2off.items(): # Build an IMAGE_IMPORT_DESCRIPTOR # Get fixed addresses out_ads = dict() # addr -> func_name for func_name, dst_addresses in self.lib_imp2dstad[ad].items(): out_ads.update({addr: func_name for addr in dst_addresses}) # Filter available addresses according to @filter_import all_ads = [ addr for addr in out_ads.keys() if filter_import(target_pe, addr)] if not all_ads: continue # Keep non-NULL elements all_ads.sort() for i, x in enumerate(all_ads): if x not in [0, None]: break all_ads = all_ads[i:] log.debug('ads: %s', map(hex, all_ads)) while all_ads: # Find libname's Import Address Table othunk = all_ads[0] i = 0 while (i + 1 < len(all_ads) and all_ads[i] + target_pe._wsize / 8 == all_ads[i + 1]): i += 1 # 'i + 1' is IAT's length # Effectively build an IMAGE_IMPORT_DESCRIPTOR funcs = [out_ads[addr] for addr in all_ads[:i + 1]] try: rva = target_pe.virt2rva(othunk) except pe.InvalidOffset: pass else: new_lib.append(({"name": lib_name, "firstthunk": rva}, funcs) ) # Update elements to handle all_ads = all_ads[i + 1:] return new_lib
def lib_get_add_base(
self, name)
def lib_get_add_base(self, name): name = name.lower().strip(' ') if not "." in name: log.debug('warning adding .dll to modulename') name += '.dll' log.debug(name) if name in self.name2off: ad = self.name2off[name] else: ad = self.libbase_ad log.warning("Create dummy entry for %r", name) self.fake_libs.add(name) self.name2off[name] = ad self.libbase2lastad[ad] = ad + 0x4 self.lib_imp2ad[ad] = {} self.lib_imp2dstad[ad] = {} self.libbase_ad += 0x1000 return ad
def lib_get_add_func(
self, libad, imp_ord_or_name, dst_ad=None)
def lib_get_add_func(self, libad, imp_ord_or_name, dst_ad=None): if not libad in self.name2off.values(): raise ValueError('unknown lib base!', hex(libad)) # test if not ordinatl # if imp_ord_or_name >0x10000: # imp_ord_or_name = vm_get_str(imp_ord_or_name, 0x100) # imp_ord_or_name = imp_ord_or_name[:imp_ord_or_name.find('\x00')] #/!\ can have multiple dst ad if not imp_ord_or_name in self.lib_imp2dstad[libad]: self.lib_imp2dstad[libad][imp_ord_or_name] = set() self.lib_imp2dstad[libad][imp_ord_or_name].add(dst_ad) if imp_ord_or_name in self.lib_imp2ad[libad]: return self.lib_imp2ad[libad][imp_ord_or_name] # log.debug('new imp %s %s' % (imp_ord_or_name, dst_ad)) ad = self.libbase2lastad[libad] self.libbase2lastad[libad] += 0x10 # arbitrary self.lib_imp2ad[libad][imp_ord_or_name] = ad name_inv = dict([(x[1], x[0]) for x in self.name2off.items()]) c_name = canon_libname_libfunc(name_inv[libad], imp_ord_or_name) self.fad2cname[ad] = c_name self.cname2addr[c_name] = ad self.fad2info[ad] = libad, imp_ord_or_name return ad