Top

miasm2.jitter.loader.pe module

import os
import struct
import logging
from collections import defaultdict

from elfesteem import pe
from elfesteem import cstruct
from elfesteem import *

from miasm2.jitter.csts import *
from miasm2.jitter.loader.utils import canon_libname_libfunc, libimp

log = logging.getLogger('loader_pe')
hnd = logging.StreamHandler()
hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s"))
log.addHandler(hnd)
log.setLevel(logging.INFO)


def get_pe_dependencies(pe_obj):
    """Return dependency set
    @pe_obj: pe object"""

    if pe_obj.DirImport.impdesc is None:
        return set()
    out = set()
    for dependency in pe_obj.DirImport.impdesc:
        libname = dependency.dlldescname.name.lower()
        out.add(libname)
    return out


def get_import_address_pe(e):
    import2addr = defaultdict(set)
    if e.DirImport.impdesc is None:
        return import2addr
    for s in e.DirImport.impdesc:
        # fthunk = e.rva2virt(s.firstthunk)
        # l = "%2d %-25s %s" % (i, repr(s.dlldescname), repr(s))
        libname = s.dlldescname.name.lower()
        for ii, imp in enumerate(s.impbynames):
            if isinstance(imp, pe.ImportByName):
                funcname = imp.name
            else:
                funcname = imp
            # l = "    %2d %-16s" % (ii, repr(funcname))
            import2addr[(libname, funcname)].add(
                e.rva2virt(s.firstthunk + e._wsize * ii / 8))
    return import2addr


def preload_pe(vm, e, runtime_lib, patch_vm_imp=True):
    fa = get_import_address_pe(e)
    dyn_funcs = {}
    # log.debug('imported funcs: %s' % fa)
    for (libname, libfunc), ads in fa.items():
        for ad in ads:
            ad_base_lib = runtime_lib.lib_get_add_base(libname)
            ad_libfunc = runtime_lib.lib_get_add_func(ad_base_lib, libfunc, ad)

            libname_s = canon_libname_libfunc(libname, libfunc)
            dyn_funcs[libname_s] = ad_libfunc
            if patch_vm_imp:
                vm.set_mem(
                    ad, struct.pack(cstruct.size2type[e._wsize], ad_libfunc))
    return dyn_funcs


def is_redirected_export(pe_obj, addr):
    """Test if the @addr is a forwarded export address. If so, return
    dllname/function name couple. If not, return False.

    An export address is a forwarded export if the rva is in the export
    directory of the pe.

    @pe_obj: PE instance
    @addr: virtual address of the function to test
    """

    export_dir = pe_obj.NThdr.optentries[pe.DIRECTORY_ENTRY_EXPORT]
    addr_rva = pe_obj.virt2rva(addr)
    if not (export_dir.rva <= addr_rva < export_dir.rva + export_dir.size):
        return False
    addr_end = pe_obj.virt.find('\x00', addr)
    data = pe_obj.virt.get(addr, addr_end)

    dllname, func_info = data.split('.', 1)
    dllname = dllname.lower()

    # Test if function is forwarded using ordinal
    if func_info.startswith('#'):
        func_info = int(func_info[1:])
    return dllname, func_info


def get_export_name_addr_list(e):
    out = []
    # add func name
    for i, n in enumerate(e.DirExport.f_names):
        addr = e.DirExport.f_address[e.DirExport.f_nameordinals[i].ordinal]
        f_name = n.name.name
        # log.debug('%s %s' % (f_name, hex(e.rva2virt(addr.rva))))
        out.append((f_name, e.rva2virt(addr.rva)))

    # add func ordinal
    for i, o in enumerate(e.DirExport.f_nameordinals):
        addr = e.DirExport.f_address[o.ordinal]
        # log.debug('%s %s %s' % (o.ordinal, e.DirExport.expdesc.base,
        # hex(e.rva2virt(addr.rva))))
        out.append(
            (o.ordinal + e.DirExport.expdesc.base, e.rva2virt(addr.rva)))

    for i, s in enumerate(e.DirExport.f_address):
        if not s.rva:
            continue
        out.append((i + e.DirExport.expdesc.base, e.rva2virt(s.rva)))

    return out


def vm_load_pe(vm, fdata, align_s=True, load_hdr=True, name="", **kargs):
    """Load a PE in memory (@vm) from a data buffer @fdata
    @vm: VmMngr instance
    @fdata: data buffer to parse
    @align_s: (optional) If False, keep gaps between section
    @load_hdr: (optional) If False, do not load the NThdr in memory
    Return the corresponding PE instance.

    Extra arguments are passed to PE instanciation.
    If all sections are aligned, they will be mapped on several different pages
    Otherwise, a big page is created, containing all sections
    """

    # Parse and build a PE instance
    pe = pe_init.PE(fdata, **kargs)

    # Check if all section are aligned
    aligned = True
    for section in pe.SHList:
        if section.addr & 0xFFF:
            aligned = False
            break

    if aligned:
        # Loader NT header
        if load_hdr:
            # Header length
            hdr_len = max(0x200, pe.NThdr.sizeofheaders)
            # Page minimum size
            min_len = min(pe.SHList[0].addr, 0x1000)

            # Get and pad the pe_hdr
            pe_hdr = pe.content[:hdr_len] + max(
                0, (min_len - hdr_len)) * "\x00"
            vm.add_memory_page(pe.NThdr.ImageBase, PAGE_READ | PAGE_WRITE,
                               pe_hdr, "%r: PE Header" % name)

        # Align sections size
        if align_s:
            # Use the next section address to compute the new size
            for i, section in enumerate(pe.SHList[:-1]):
                new_size = pe.SHList[i + 1].addr - section.addr
                section.size = new_size
                section.rawsize = new_size
                section.data = strpatchwork.StrPatchwork(
                    section.data[:new_size]
                )
                section.offset = section.addr

            # Last section alignement
            last_section = pe.SHList[-1]
            last_section.size = (last_section.size + 0xfff) & 0xfffff000

        # Pad sections with null bytes and map them
        for section in pe.SHList:
            data = str(section.data)
            data += "\x00" * (section.size - len(data))
            attrib = PAGE_READ
            if section.flags & 0x80000000:
                attrib |= PAGE_WRITE
            vm.add_memory_page(pe.rva2virt(section.addr), attrib, data,
                               "%r: %r" % (name, section.name))

        return pe

    # At least one section is not aligned
    log.warning('PE is not aligned, creating big section')
    min_addr = 0 if load_hdr else None
    max_addr = None
    data = ""

    for i, section in enumerate(pe.SHList):
        if i < len(pe.SHList) - 1:
            # If it is not the last section, use next section address
            section.size = pe.SHList[i + 1].addr - section.addr
        section.rawsize = section.size
        section.offset = section.addr

        # Update min and max addresses
        if min_addr is None or section.addr < min_addr:
            min_addr = section.addr
        max_section_len = max(section.size, len(section.data))
        if max_addr is None or section.addr + max_section_len > max_addr:
            max_addr = section.addr + max_section_len

    min_addr = pe.rva2virt(min_addr)
    max_addr = pe.rva2virt(max_addr)
    log.debug('Min: 0x%x, Max: 0x%x, Size: 0x%x', min_addr, max_addr,
              (max_addr - min_addr))

    # Create only one big section containing the whole PE
    vm.add_memory_page(min_addr,
                       PAGE_READ | PAGE_WRITE,
                       (max_addr - min_addr) * "\x00")

    # Copy each sections content in memory
    for section in pe.SHList:
        log.debug('Map 0x%x bytes to 0x%x', len(section.data),
                  pe.rva2virt(section.addr))
        vm.set_mem(pe.rva2virt(section.addr), str(section.data))

    return pe


def vm_load_pe_lib(vm, fname_in, libs, lib_path_base, **kargs):
    """Call vm_load_pe on @fname_in and update @libs accordingly
    @vm: VmMngr instance
    @fname_in: library name
    @libs: libimp_pe instance
    @lib_path_base: DLLs relative path
    Return the corresponding PE instance
    Extra arguments are passed to vm_load_pe
    """

    log.info('Loading module %r', fname_in)

    fname = os.path.join(lib_path_base, fname_in)
    with open(fname, "rb") as fstream:
        pe = vm_load_pe(vm, fstream.read(), name=fname_in, **kargs)
    libs.add_export_lib(pe, fname_in)
    return pe


def vm_load_pe_libs(vm, libs_name, libs, lib_path_base, **kargs):
    """Call vm_load_pe_lib on each @libs_name filename
    @vm: VmMngr instance
    @libs_name: list of str
    @libs: libimp_pe instance
    @lib_path_base: (optional) DLLs relative path
    Return a dictionary Filename -> PE instances
    Extra arguments are passed to vm_load_pe_lib
    """
    return {fname: vm_load_pe_lib(vm, fname, libs, lib_path_base, **kargs)
            for fname in libs_name}


def vm_fix_imports_pe_libs(lib_imgs, libs, lib_path_base,
                           patch_vm_imp=True, **kargs):
    for e in lib_imgs.values():
        preload_pe(e, libs, patch_vm_imp)


def vm2pe(myjit, fname, libs=None, e_orig=None,
          min_addr=None, max_addr=None,
          min_section_offset=0x1000, img_base=None,
          added_funcs=None, **kwargs):
    if e_orig:
        size = e_orig._wsize
    else:
        size = 32
    mye = pe_init.PE(wsize=size)

    if min_addr is None and e_orig is not None:
        min_addr = min([e_orig.rva2virt(s.addr) for s in e_orig.SHList])
    if max_addr is None and e_orig is not None:
        max_addr = max([e_orig.rva2virt(s.addr + s.size)
                       for s in e_orig.SHList])

    if img_base is None:
        img_base = e_orig.NThdr.ImageBase

    mye.NThdr.ImageBase = img_base
    all_mem = myjit.vm.get_all_memory()
    addrs = all_mem.keys()
    addrs.sort()
    mye.Opthdr.AddressOfEntryPoint = mye.virt2rva(myjit.pc)
    first = True
    for ad in addrs:
        if not min_addr <= ad < max_addr:
            continue
        log.debug("0x%x", ad)
        if first:
            mye.SHList.add_section(
                "%.8X" % ad,
                addr=ad - mye.NThdr.ImageBase,
                data=all_mem[ad]['data'],
                offset=min_section_offset)
        else:
            mye.SHList.add_section(
                "%.8X" % ad,
                addr=ad - mye.NThdr.ImageBase,
                data=all_mem[ad]['data'])
        first = False
    if libs:
        if added_funcs is not None:
            # name_inv = dict([(x[1], x[0]) for x in libs.name2off.items()])

            for addr, funcaddr in added_funcs:
                libbase, dllname = libs.fad2info[funcaddr]
                libs.lib_get_add_func(libbase, dllname, addr)

        filter_import = kwargs.get(
            'filter_import', lambda _, ad: mye.virt.is_addr_in(ad))
        new_dll = libs.gen_new_lib(mye, filter_import)
    else:
        new_dll = {}

    log.debug('%s', new_dll)

    mye.DirImport.add_dlldesc(new_dll)
    s_imp = mye.SHList.add_section("import", rawsize=len(mye.DirImport))
    mye.DirImport.set_rva(s_imp.addr)
    log.debug('%r', mye.SHList)
    if e_orig:
        # resource
        xx = str(mye)
        mye.content = xx
        ad = e_orig.NThdr.optentries[pe.DIRECTORY_ENTRY_RESOURCE].rva
        size = e_orig.NThdr.optentries[pe.DIRECTORY_ENTRY_RESOURCE].size
        log.debug('dirres 0x%x', ad)
        if ad != 0:
            mye.NThdr.optentries[pe.DIRECTORY_ENTRY_RESOURCE].rva = ad
            mye.NThdr.optentries[pe.DIRECTORY_ENTRY_RESOURCE].size = size
            mye.DirRes = pe.DirRes.unpack(mye.img_rva, ad, mye)
            log.debug('%r', mye.DirRes)
            s_res = mye.SHList.add_section(name="myres", rawsize=len(mye.DirRes))
            mye.DirRes.set_rva(s_res.addr)
    # generation
    open(fname, 'wb').write(str(mye))
    return mye


class libimp_pe(libimp):

    def __init__(self, *args, **kwargs):
        super(libimp_pe, self).__init__(*args, **kwargs)
        # dependency -> redirector
        self.created_redirected_imports = {}

    def add_export_lib(self, e, name):
        if name in self.created_redirected_imports:
            log.error("%r has previously been created due to redirect\
            imports due to %r. Change the loading order.",
                      name, self.created_redirected_imports[name])
            raise RuntimeError('Bad import: loading previously created import')

        self.all_exported_lib.append(e)
        # will add real lib addresses to database
        if name in self.name2off:
            ad = self.name2off[name]
            if e is not None and name in self.fake_libs:
                log.error(
                    "You are trying to load %r but it has been faked previously. Try loading this module earlier.", name)
                raise RuntimeError("Bad import")
        else:
            log.debug('new lib %s', name)
            ad = e.NThdr.ImageBase
            libad = ad
            self.name2off[name] = ad
            self.libbase2lastad[ad] = ad + 0x1
            self.lib_imp2ad[ad] = {}
            self.lib_imp2dstad[ad] = {}
            self.libbase_ad += 0x1000

            ads = get_export_name_addr_list(e)
            todo = ads
            # done = []
            while todo:
                # for imp_ord_or_name, ad in ads:
                imp_ord_or_name, ad = todo.pop()

                # if export is a redirection, search redirected dll
                # and get function real addr
                ret = is_redirected_export(e, ad)
                if ret:
                    exp_dname, exp_fname = ret
                    exp_dname = exp_dname + '.dll'
                    exp_dname = exp_dname.lower()
                    # if dll auto refes in redirection
                    if exp_dname == name:
                        libad_tmp = self.name2off[exp_dname]
                        if not exp_fname in self.lib_imp2ad[libad_tmp]:
                            # schedule func
                            todo = [(imp_ord_or_name, ad)] + todo
                            continue
                    else:
                        # import redirected lib from non loaded dll
                        if not exp_dname in self.name2off:
                            self.created_redirected_imports.setdefault(
                                exp_dname, set()).add(name)

                        # Ensure import entry is created
                        new_lib_base = self.lib_get_add_base(exp_dname)
                        # Ensure function entry is created
                        _ = self.lib_get_add_func(new_lib_base, exp_fname)

                    c_name = canon_libname_libfunc(exp_dname, exp_fname)
                    libad_tmp = self.name2off[exp_dname]
                    ad = self.lib_imp2ad[libad_tmp][exp_fname]

                self.lib_imp2ad[libad][imp_ord_or_name] = ad
                name_inv = dict([(x[1], x[0]) for x in self.name2off.items()])
                c_name = canon_libname_libfunc(
                    name_inv[libad], imp_ord_or_name)
                self.fad2cname[ad] = c_name
                self.cname2addr[c_name] = ad
                log.debug("Add func %s %s", hex(ad), c_name)
                self.fad2info[ad] = libad, imp_ord_or_name

    def gen_new_lib(self, target_pe, filter_import=lambda peobj, ad: True, **kwargs):
        """Gen a new DirImport description
        @target_pe: PE instance
        @filter_import: (boolean f(pe, address)) restrict addresses to keep
        """

        new_lib = []
        for lib_name, ad in self.name2off.items():
            # Build an IMAGE_IMPORT_DESCRIPTOR

            # Get fixed addresses
            out_ads = dict()  # addr -> func_name
            for func_name, dst_addresses in self.lib_imp2dstad[ad].items():
                out_ads.update({addr: func_name for addr in dst_addresses})

            # Filter available addresses according to @filter_import
            all_ads = [
                addr for addr in out_ads.keys() if filter_import(target_pe, addr)]
            if not all_ads:
                continue

            # Keep non-NULL elements
            all_ads.sort()
            for i, x in enumerate(all_ads):
                if x not in [0,  None]:
                    break
            all_ads = all_ads[i:]
            log.debug('ads: %s', map(hex, all_ads))

            while all_ads:
                # Find libname's Import Address Table
                othunk = all_ads[0]
                i = 0
                while (i + 1 < len(all_ads) and
                       all_ads[i] + target_pe._wsize / 8 == all_ads[i + 1]):
                    i += 1
                # 'i + 1' is IAT's length

                # Effectively build an IMAGE_IMPORT_DESCRIPTOR
                funcs = [out_ads[addr] for addr in all_ads[:i + 1]]
                try:
                    rva = target_pe.virt2rva(othunk)
                except pe.InvalidOffset:
                    pass
                else:
                    new_lib.append(({"name": lib_name,
                                     "firstthunk": rva},
                                    funcs)
                                   )

                # Update elements to handle
                all_ads = all_ads[i + 1:]

        return new_lib


def vm_load_pe_and_dependencies(vm, fname, name2module, runtime_lib,
                                lib_path_base, **kwargs):
    """Load a binary and all its dependencies. Returns a dictionary containing
    the association between binaries names and it's pe object

    @vm: virtual memory manager instance
    @fname: full path of the binary
    @name2module: dict containing association between name and pe
    object. Updated.
    @runtime_lib: libimp instance
    @lib_path_base: directory of the libraries containing dependencies

    """

    todo = [(fname, fname, 0)]
    weight2name = {}
    done = set()

    # Walk dependencies recursively
    while todo:
        name, fname, weight = todo.pop()
        if name in done:
            continue
        done.add(name)
        weight2name.setdefault(weight, set()).add(name)
        if name in name2module:
            pe_obj = name2module[name]
        else:
            try:
                with open(fname, "rb") as fstream:
                    log.info('Loading module name %r', fname)
                    pe_obj = vm_load_pe(
                        vm, fstream.read(), name=fname, **kwargs)
            except IOError:
                log.error('Cannot open %s' % fname)
                name2module[name] = None
                continue
            name2module[name] = pe_obj

        new_dependencies = get_pe_dependencies(pe_obj)
        todo += [(name, os.path.join(lib_path_base, name), weight - 1)
                 for name in new_dependencies]

    ordered_modules = sorted(weight2name.items())
    for _, modules in ordered_modules:
        for name in modules:
            pe_obj = name2module[name]
            if pe_obj is None:
                continue
            # Fix imports
            if pe_obj.DirExport:
                runtime_lib.add_export_lib(pe_obj, name)

    for pe_obj in name2module.itervalues():
        if pe_obj is None:
            continue
        preload_pe(vm, pe_obj, runtime_lib, patch_vm_imp=True)

    return name2module

# machine -> arch
PE_machine = {0x14c: "x86_32",
              0x8664: "x86_64",
              }


def guess_arch(pe):
    """Return the architecture specified by the PE container @pe.
    If unknown, return None"""
    return PE_machine.get(pe.Coffhdr.machine, None)

Module variables

var BREAKPOINT_READ

var BREAKPOINT_WRITE

var EXCEPT_ACCESS_VIOL

var EXCEPT_BREAKPOINT_INTERN

var EXCEPT_BREAKPOINT_MEMORY

var EXCEPT_CODE_AUTOMOD

var EXCEPT_DIV_BY_ZERO

var EXCEPT_DO_NOT_UPDATE_PC

var EXCEPT_ILLEGAL_INSN

var EXCEPT_INT_XX

var EXCEPT_NUM_UPDT_EIP

var EXCEPT_PRIV_INSN

var EXCEPT_SOFT_BP

var EXCEPT_SPR_ACCESS

var EXCEPT_UNK_MNEMO

var PAGE_EXEC

var PAGE_READ

var PAGE_WRITE

var PE_machine

var hnd

var log

Functions

def get_export_name_addr_list(

e)

def get_export_name_addr_list(e):
    out = []
    # add func name
    for i, n in enumerate(e.DirExport.f_names):
        addr = e.DirExport.f_address[e.DirExport.f_nameordinals[i].ordinal]
        f_name = n.name.name
        # log.debug('%s %s' % (f_name, hex(e.rva2virt(addr.rva))))
        out.append((f_name, e.rva2virt(addr.rva)))

    # add func ordinal
    for i, o in enumerate(e.DirExport.f_nameordinals):
        addr = e.DirExport.f_address[o.ordinal]
        # log.debug('%s %s %s' % (o.ordinal, e.DirExport.expdesc.base,
        # hex(e.rva2virt(addr.rva))))
        out.append(
            (o.ordinal + e.DirExport.expdesc.base, e.rva2virt(addr.rva)))

    for i, s in enumerate(e.DirExport.f_address):
        if not s.rva:
            continue
        out.append((i + e.DirExport.expdesc.base, e.rva2virt(s.rva)))

    return out

def get_import_address_pe(

e)

def get_import_address_pe(e):
    import2addr = defaultdict(set)
    if e.DirImport.impdesc is None:
        return import2addr
    for s in e.DirImport.impdesc:
        # fthunk = e.rva2virt(s.firstthunk)
        # l = "%2d %-25s %s" % (i, repr(s.dlldescname), repr(s))
        libname = s.dlldescname.name.lower()
        for ii, imp in enumerate(s.impbynames):
            if isinstance(imp, pe.ImportByName):
                funcname = imp.name
            else:
                funcname = imp
            # l = "    %2d %-16s" % (ii, repr(funcname))
            import2addr[(libname, funcname)].add(
                e.rva2virt(s.firstthunk + e._wsize * ii / 8))
    return import2addr

def get_pe_dependencies(

pe_obj)

Return dependency set @pe_obj: pe object

def get_pe_dependencies(pe_obj):
    """Return dependency set
    @pe_obj: pe object"""

    if pe_obj.DirImport.impdesc is None:
        return set()
    out = set()
    for dependency in pe_obj.DirImport.impdesc:
        libname = dependency.dlldescname.name.lower()
        out.add(libname)
    return out

def guess_arch(

pe)

Return the architecture specified by the PE container @pe. If unknown, return None

def guess_arch(pe):
    """Return the architecture specified by the PE container @pe.
    If unknown, return None"""
    return PE_machine.get(pe.Coffhdr.machine, None)

def is_redirected_export(

pe_obj, addr)

Test if the @addr is a forwarded export address. If so, return dllname/function name couple. If not, return False.

An export address is a forwarded export if the rva is in the export directory of the pe.

@pe_obj: PE instance @addr: virtual address of the function to test

def is_redirected_export(pe_obj, addr):
    """Test if the @addr is a forwarded export address. If so, return
    dllname/function name couple. If not, return False.

    An export address is a forwarded export if the rva is in the export
    directory of the pe.

    @pe_obj: PE instance
    @addr: virtual address of the function to test
    """

    export_dir = pe_obj.NThdr.optentries[pe.DIRECTORY_ENTRY_EXPORT]
    addr_rva = pe_obj.virt2rva(addr)
    if not (export_dir.rva <= addr_rva < export_dir.rva + export_dir.size):
        return False
    addr_end = pe_obj.virt.find('\x00', addr)
    data = pe_obj.virt.get(addr, addr_end)

    dllname, func_info = data.split('.', 1)
    dllname = dllname.lower()

    # Test if function is forwarded using ordinal
    if func_info.startswith('#'):
        func_info = int(func_info[1:])
    return dllname, func_info

def preload_pe(

vm, e, runtime_lib, patch_vm_imp=True)

def preload_pe(vm, e, runtime_lib, patch_vm_imp=True):
    fa = get_import_address_pe(e)
    dyn_funcs = {}
    # log.debug('imported funcs: %s' % fa)
    for (libname, libfunc), ads in fa.items():
        for ad in ads:
            ad_base_lib = runtime_lib.lib_get_add_base(libname)
            ad_libfunc = runtime_lib.lib_get_add_func(ad_base_lib, libfunc, ad)

            libname_s = canon_libname_libfunc(libname, libfunc)
            dyn_funcs[libname_s] = ad_libfunc
            if patch_vm_imp:
                vm.set_mem(
                    ad, struct.pack(cstruct.size2type[e._wsize], ad_libfunc))
    return dyn_funcs

def vm2pe(

myjit, fname, libs=None, e_orig=None, min_addr=None, max_addr=None, min_section_offset=4096, img_base=None, added_funcs=None, **kwargs)

def vm2pe(myjit, fname, libs=None, e_orig=None,
          min_addr=None, max_addr=None,
          min_section_offset=0x1000, img_base=None,
          added_funcs=None, **kwargs):
    if e_orig:
        size = e_orig._wsize
    else:
        size = 32
    mye = pe_init.PE(wsize=size)

    if min_addr is None and e_orig is not None:
        min_addr = min([e_orig.rva2virt(s.addr) for s in e_orig.SHList])
    if max_addr is None and e_orig is not None:
        max_addr = max([e_orig.rva2virt(s.addr + s.size)
                       for s in e_orig.SHList])

    if img_base is None:
        img_base = e_orig.NThdr.ImageBase

    mye.NThdr.ImageBase = img_base
    all_mem = myjit.vm.get_all_memory()
    addrs = all_mem.keys()
    addrs.sort()
    mye.Opthdr.AddressOfEntryPoint = mye.virt2rva(myjit.pc)
    first = True
    for ad in addrs:
        if not min_addr <= ad < max_addr:
            continue
        log.debug("0x%x", ad)
        if first:
            mye.SHList.add_section(
                "%.8X" % ad,
                addr=ad - mye.NThdr.ImageBase,
                data=all_mem[ad]['data'],
                offset=min_section_offset)
        else:
            mye.SHList.add_section(
                "%.8X" % ad,
                addr=ad - mye.NThdr.ImageBase,
                data=all_mem[ad]['data'])
        first = False
    if libs:
        if added_funcs is not None:
            # name_inv = dict([(x[1], x[0]) for x in libs.name2off.items()])

            for addr, funcaddr in added_funcs:
                libbase, dllname = libs.fad2info[funcaddr]
                libs.lib_get_add_func(libbase, dllname, addr)

        filter_import = kwargs.get(
            'filter_import', lambda _, ad: mye.virt.is_addr_in(ad))
        new_dll = libs.gen_new_lib(mye, filter_import)
    else:
        new_dll = {}

    log.debug('%s', new_dll)

    mye.DirImport.add_dlldesc(new_dll)
    s_imp = mye.SHList.add_section("import", rawsize=len(mye.DirImport))
    mye.DirImport.set_rva(s_imp.addr)
    log.debug('%r', mye.SHList)
    if e_orig:
        # resource
        xx = str(mye)
        mye.content = xx
        ad = e_orig.NThdr.optentries[pe.DIRECTORY_ENTRY_RESOURCE].rva
        size = e_orig.NThdr.optentries[pe.DIRECTORY_ENTRY_RESOURCE].size
        log.debug('dirres 0x%x', ad)
        if ad != 0:
            mye.NThdr.optentries[pe.DIRECTORY_ENTRY_RESOURCE].rva = ad
            mye.NThdr.optentries[pe.DIRECTORY_ENTRY_RESOURCE].size = size
            mye.DirRes = pe.DirRes.unpack(mye.img_rva, ad, mye)
            log.debug('%r', mye.DirRes)
            s_res = mye.SHList.add_section(name="myres", rawsize=len(mye.DirRes))
            mye.DirRes.set_rva(s_res.addr)
    # generation
    open(fname, 'wb').write(str(mye))
    return mye

def vm_fix_imports_pe_libs(

lib_imgs, libs, lib_path_base, patch_vm_imp=True, **kargs)

def vm_fix_imports_pe_libs(lib_imgs, libs, lib_path_base,
                           patch_vm_imp=True, **kargs):
    for e in lib_imgs.values():
        preload_pe(e, libs, patch_vm_imp)

def vm_load_pe(

vm, fdata, align_s=True, load_hdr=True, name='', **kargs)

Load a PE in memory (@vm) from a data buffer @fdata @vm: VmMngr instance @fdata: data buffer to parse @align_s: (optional) If False, keep gaps between section @load_hdr: (optional) If False, do not load the NThdr in memory Return the corresponding PE instance.

Extra arguments are passed to PE instanciation. If all sections are aligned, they will be mapped on several different pages Otherwise, a big page is created, containing all sections

def vm_load_pe(vm, fdata, align_s=True, load_hdr=True, name="", **kargs):
    """Load a PE in memory (@vm) from a data buffer @fdata
    @vm: VmMngr instance
    @fdata: data buffer to parse
    @align_s: (optional) If False, keep gaps between section
    @load_hdr: (optional) If False, do not load the NThdr in memory
    Return the corresponding PE instance.

    Extra arguments are passed to PE instanciation.
    If all sections are aligned, they will be mapped on several different pages
    Otherwise, a big page is created, containing all sections
    """

    # Parse and build a PE instance
    pe = pe_init.PE(fdata, **kargs)

    # Check if all section are aligned
    aligned = True
    for section in pe.SHList:
        if section.addr & 0xFFF:
            aligned = False
            break

    if aligned:
        # Loader NT header
        if load_hdr:
            # Header length
            hdr_len = max(0x200, pe.NThdr.sizeofheaders)
            # Page minimum size
            min_len = min(pe.SHList[0].addr, 0x1000)

            # Get and pad the pe_hdr
            pe_hdr = pe.content[:hdr_len] + max(
                0, (min_len - hdr_len)) * "\x00"
            vm.add_memory_page(pe.NThdr.ImageBase, PAGE_READ | PAGE_WRITE,
                               pe_hdr, "%r: PE Header" % name)

        # Align sections size
        if align_s:
            # Use the next section address to compute the new size
            for i, section in enumerate(pe.SHList[:-1]):
                new_size = pe.SHList[i + 1].addr - section.addr
                section.size = new_size
                section.rawsize = new_size
                section.data = strpatchwork.StrPatchwork(
                    section.data[:new_size]
                )
                section.offset = section.addr

            # Last section alignement
            last_section = pe.SHList[-1]
            last_section.size = (last_section.size + 0xfff) & 0xfffff000

        # Pad sections with null bytes and map them
        for section in pe.SHList:
            data = str(section.data)
            data += "\x00" * (section.size - len(data))
            attrib = PAGE_READ
            if section.flags & 0x80000000:
                attrib |= PAGE_WRITE
            vm.add_memory_page(pe.rva2virt(section.addr), attrib, data,
                               "%r: %r" % (name, section.name))

        return pe

    # At least one section is not aligned
    log.warning('PE is not aligned, creating big section')
    min_addr = 0 if load_hdr else None
    max_addr = None
    data = ""

    for i, section in enumerate(pe.SHList):
        if i < len(pe.SHList) - 1:
            # If it is not the last section, use next section address
            section.size = pe.SHList[i + 1].addr - section.addr
        section.rawsize = section.size
        section.offset = section.addr

        # Update min and max addresses
        if min_addr is None or section.addr < min_addr:
            min_addr = section.addr
        max_section_len = max(section.size, len(section.data))
        if max_addr is None or section.addr + max_section_len > max_addr:
            max_addr = section.addr + max_section_len

    min_addr = pe.rva2virt(min_addr)
    max_addr = pe.rva2virt(max_addr)
    log.debug('Min: 0x%x, Max: 0x%x, Size: 0x%x', min_addr, max_addr,
              (max_addr - min_addr))

    # Create only one big section containing the whole PE
    vm.add_memory_page(min_addr,
                       PAGE_READ | PAGE_WRITE,
                       (max_addr - min_addr) * "\x00")

    # Copy each sections content in memory
    for section in pe.SHList:
        log.debug('Map 0x%x bytes to 0x%x', len(section.data),
                  pe.rva2virt(section.addr))
        vm.set_mem(pe.rva2virt(section.addr), str(section.data))

    return pe

def vm_load_pe_and_dependencies(

vm, fname, name2module, runtime_lib, lib_path_base, **kwargs)

Load a binary and all its dependencies. Returns a dictionary containing the association between binaries names and it's pe object

@vm: virtual memory manager instance @fname: full path of the binary @name2module: dict containing association between name and pe object. Updated. @runtime_lib: libimp instance @lib_path_base: directory of the libraries containing dependencies

def vm_load_pe_and_dependencies(vm, fname, name2module, runtime_lib,
                                lib_path_base, **kwargs):
    """Load a binary and all its dependencies. Returns a dictionary containing
    the association between binaries names and it's pe object

    @vm: virtual memory manager instance
    @fname: full path of the binary
    @name2module: dict containing association between name and pe
    object. Updated.
    @runtime_lib: libimp instance
    @lib_path_base: directory of the libraries containing dependencies

    """

    todo = [(fname, fname, 0)]
    weight2name = {}
    done = set()

    # Walk dependencies recursively
    while todo:
        name, fname, weight = todo.pop()
        if name in done:
            continue
        done.add(name)
        weight2name.setdefault(weight, set()).add(name)
        if name in name2module:
            pe_obj = name2module[name]
        else:
            try:
                with open(fname, "rb") as fstream:
                    log.info('Loading module name %r', fname)
                    pe_obj = vm_load_pe(
                        vm, fstream.read(), name=fname, **kwargs)
            except IOError:
                log.error('Cannot open %s' % fname)
                name2module[name] = None
                continue
            name2module[name] = pe_obj

        new_dependencies = get_pe_dependencies(pe_obj)
        todo += [(name, os.path.join(lib_path_base, name), weight - 1)
                 for name in new_dependencies]

    ordered_modules = sorted(weight2name.items())
    for _, modules in ordered_modules:
        for name in modules:
            pe_obj = name2module[name]
            if pe_obj is None:
                continue
            # Fix imports
            if pe_obj.DirExport:
                runtime_lib.add_export_lib(pe_obj, name)

    for pe_obj in name2module.itervalues():
        if pe_obj is None:
            continue
        preload_pe(vm, pe_obj, runtime_lib, patch_vm_imp=True)

    return name2module

def vm_load_pe_lib(

vm, fname_in, libs, lib_path_base, **kargs)

Call vm_load_pe on @fname_in and update @libs accordingly @vm: VmMngr instance @fname_in: library name @libs: libimp_pe instance @lib_path_base: DLLs relative path Return the corresponding PE instance Extra arguments are passed to vm_load_pe

def vm_load_pe_lib(vm, fname_in, libs, lib_path_base, **kargs):
    """Call vm_load_pe on @fname_in and update @libs accordingly
    @vm: VmMngr instance
    @fname_in: library name
    @libs: libimp_pe instance
    @lib_path_base: DLLs relative path
    Return the corresponding PE instance
    Extra arguments are passed to vm_load_pe
    """

    log.info('Loading module %r', fname_in)

    fname = os.path.join(lib_path_base, fname_in)
    with open(fname, "rb") as fstream:
        pe = vm_load_pe(vm, fstream.read(), name=fname_in, **kargs)
    libs.add_export_lib(pe, fname_in)
    return pe

def vm_load_pe_libs(

vm, libs_name, libs, lib_path_base, **kargs)

Call vm_load_pe_lib on each @libs_name filename @vm: VmMngr instance @libs_name: list of str @libs: libimp_pe instance @lib_path_base: (optional) DLLs relative path Return a dictionary Filename -> PE instances Extra arguments are passed to vm_load_pe_lib

def vm_load_pe_libs(vm, libs_name, libs, lib_path_base, **kargs):
    """Call vm_load_pe_lib on each @libs_name filename
    @vm: VmMngr instance
    @libs_name: list of str
    @libs: libimp_pe instance
    @lib_path_base: (optional) DLLs relative path
    Return a dictionary Filename -> PE instances
    Extra arguments are passed to vm_load_pe_lib
    """
    return {fname: vm_load_pe_lib(vm, fname, libs, lib_path_base, **kargs)
            for fname in libs_name}

Classes

class libimp_pe

class libimp_pe(libimp):

    def __init__(self, *args, **kwargs):
        super(libimp_pe, self).__init__(*args, **kwargs)
        # dependency -> redirector
        self.created_redirected_imports = {}

    def add_export_lib(self, e, name):
        if name in self.created_redirected_imports:
            log.error("%r has previously been created due to redirect\
            imports due to %r. Change the loading order.",
                      name, self.created_redirected_imports[name])
            raise RuntimeError('Bad import: loading previously created import')

        self.all_exported_lib.append(e)
        # will add real lib addresses to database
        if name in self.name2off:
            ad = self.name2off[name]
            if e is not None and name in self.fake_libs:
                log.error(
                    "You are trying to load %r but it has been faked previously. Try loading this module earlier.", name)
                raise RuntimeError("Bad import")
        else:
            log.debug('new lib %s', name)
            ad = e.NThdr.ImageBase
            libad = ad
            self.name2off[name] = ad
            self.libbase2lastad[ad] = ad + 0x1
            self.lib_imp2ad[ad] = {}
            self.lib_imp2dstad[ad] = {}
            self.libbase_ad += 0x1000

            ads = get_export_name_addr_list(e)
            todo = ads
            # done = []
            while todo:
                # for imp_ord_or_name, ad in ads:
                imp_ord_or_name, ad = todo.pop()

                # if export is a redirection, search redirected dll
                # and get function real addr
                ret = is_redirected_export(e, ad)
                if ret:
                    exp_dname, exp_fname = ret
                    exp_dname = exp_dname + '.dll'
                    exp_dname = exp_dname.lower()
                    # if dll auto refes in redirection
                    if exp_dname == name:
                        libad_tmp = self.name2off[exp_dname]
                        if not exp_fname in self.lib_imp2ad[libad_tmp]:
                            # schedule func
                            todo = [(imp_ord_or_name, ad)] + todo
                            continue
                    else:
                        # import redirected lib from non loaded dll
                        if not exp_dname in self.name2off:
                            self.created_redirected_imports.setdefault(
                                exp_dname, set()).add(name)

                        # Ensure import entry is created
                        new_lib_base = self.lib_get_add_base(exp_dname)
                        # Ensure function entry is created
                        _ = self.lib_get_add_func(new_lib_base, exp_fname)

                    c_name = canon_libname_libfunc(exp_dname, exp_fname)
                    libad_tmp = self.name2off[exp_dname]
                    ad = self.lib_imp2ad[libad_tmp][exp_fname]

                self.lib_imp2ad[libad][imp_ord_or_name] = ad
                name_inv = dict([(x[1], x[0]) for x in self.name2off.items()])
                c_name = canon_libname_libfunc(
                    name_inv[libad], imp_ord_or_name)
                self.fad2cname[ad] = c_name
                self.cname2addr[c_name] = ad
                log.debug("Add func %s %s", hex(ad), c_name)
                self.fad2info[ad] = libad, imp_ord_or_name

    def gen_new_lib(self, target_pe, filter_import=lambda peobj, ad: True, **kwargs):
        """Gen a new DirImport description
        @target_pe: PE instance
        @filter_import: (boolean f(pe, address)) restrict addresses to keep
        """

        new_lib = []
        for lib_name, ad in self.name2off.items():
            # Build an IMAGE_IMPORT_DESCRIPTOR

            # Get fixed addresses
            out_ads = dict()  # addr -> func_name
            for func_name, dst_addresses in self.lib_imp2dstad[ad].items():
                out_ads.update({addr: func_name for addr in dst_addresses})

            # Filter available addresses according to @filter_import
            all_ads = [
                addr for addr in out_ads.keys() if filter_import(target_pe, addr)]
            if not all_ads:
                continue

            # Keep non-NULL elements
            all_ads.sort()
            for i, x in enumerate(all_ads):
                if x not in [0,  None]:
                    break
            all_ads = all_ads[i:]
            log.debug('ads: %s', map(hex, all_ads))

            while all_ads:
                # Find libname's Import Address Table
                othunk = all_ads[0]
                i = 0
                while (i + 1 < len(all_ads) and
                       all_ads[i] + target_pe._wsize / 8 == all_ads[i + 1]):
                    i += 1
                # 'i + 1' is IAT's length

                # Effectively build an IMAGE_IMPORT_DESCRIPTOR
                funcs = [out_ads[addr] for addr in all_ads[:i + 1]]
                try:
                    rva = target_pe.virt2rva(othunk)
                except pe.InvalidOffset:
                    pass
                else:
                    new_lib.append(({"name": lib_name,
                                     "firstthunk": rva},
                                    funcs)
                                   )

                # Update elements to handle
                all_ads = all_ads[i + 1:]

        return new_lib

Ancestors (in MRO)

  • libimp_pe
  • miasm2.jitter.loader.utils.libimp
  • __builtin__.object

Instance variables

var created_redirected_imports

Methods

def __init__(

self, *args, **kwargs)

def __init__(self, *args, **kwargs):
    super(libimp_pe, self).__init__(*args, **kwargs)
    # dependency -> redirector
    self.created_redirected_imports = {}

def add_export_lib(

self, e, name)

def add_export_lib(self, e, name):
    if name in self.created_redirected_imports:
        log.error("%r has previously been created due to redirect\
        imports due to %r. Change the loading order.",
                  name, self.created_redirected_imports[name])
        raise RuntimeError('Bad import: loading previously created import')
    self.all_exported_lib.append(e)
    # will add real lib addresses to database
    if name in self.name2off:
        ad = self.name2off[name]
        if e is not None and name in self.fake_libs:
            log.error(
                "You are trying to load %r but it has been faked previously. Try loading this module earlier.", name)
            raise RuntimeError("Bad import")
    else:
        log.debug('new lib %s', name)
        ad = e.NThdr.ImageBase
        libad = ad
        self.name2off[name] = ad
        self.libbase2lastad[ad] = ad + 0x1
        self.lib_imp2ad[ad] = {}
        self.lib_imp2dstad[ad] = {}
        self.libbase_ad += 0x1000
        ads = get_export_name_addr_list(e)
        todo = ads
        # done = []
        while todo:
            # for imp_ord_or_name, ad in ads:
            imp_ord_or_name, ad = todo.pop()
            # if export is a redirection, search redirected dll
            # and get function real addr
            ret = is_redirected_export(e, ad)
            if ret:
                exp_dname, exp_fname = ret
                exp_dname = exp_dname + '.dll'
                exp_dname = exp_dname.lower()
                # if dll auto refes in redirection
                if exp_dname == name:
                    libad_tmp = self.name2off[exp_dname]
                    if not exp_fname in self.lib_imp2ad[libad_tmp]:
                        # schedule func
                        todo = [(imp_ord_or_name, ad)] + todo
                        continue
                else:
                    # import redirected lib from non loaded dll
                    if not exp_dname in self.name2off:
                        self.created_redirected_imports.setdefault(
                            exp_dname, set()).add(name)
                    # Ensure import entry is created
                    new_lib_base = self.lib_get_add_base(exp_dname)
                    # Ensure function entry is created
                    _ = self.lib_get_add_func(new_lib_base, exp_fname)
                c_name = canon_libname_libfunc(exp_dname, exp_fname)
                libad_tmp = self.name2off[exp_dname]
                ad = self.lib_imp2ad[libad_tmp][exp_fname]
            self.lib_imp2ad[libad][imp_ord_or_name] = ad
            name_inv = dict([(x[1], x[0]) for x in self.name2off.items()])
            c_name = canon_libname_libfunc(
                name_inv[libad], imp_ord_or_name)
            self.fad2cname[ad] = c_name
            self.cname2addr[c_name] = ad
            log.debug("Add func %s %s", hex(ad), c_name)
            self.fad2info[ad] = libad, imp_ord_or_name

def check_dst_ad(

self)

def check_dst_ad(self):
    for ad in self.lib_imp2dstad:
        all_ads = self.lib_imp2dstad[ad].values()
        all_ads.sort()
        for i, x in enumerate(all_ads[:-1]):
            if x is None or all_ads[i + 1] is None:
                return False
            if x + 4 != all_ads[i + 1]:
                return False
    return True

def gen_new_lib(

self, target_pe, filter_import=<function <lambda> at 0x7f19ada73050>, **kwargs)

Gen a new DirImport description @target_pe: PE instance @filter_import: (boolean f(pe, address)) restrict addresses to keep

def gen_new_lib(self, target_pe, filter_import=lambda peobj, ad: True, **kwargs):
    """Gen a new DirImport description
    @target_pe: PE instance
    @filter_import: (boolean f(pe, address)) restrict addresses to keep
    """
    new_lib = []
    for lib_name, ad in self.name2off.items():
        # Build an IMAGE_IMPORT_DESCRIPTOR
        # Get fixed addresses
        out_ads = dict()  # addr -> func_name
        for func_name, dst_addresses in self.lib_imp2dstad[ad].items():
            out_ads.update({addr: func_name for addr in dst_addresses})
        # Filter available addresses according to @filter_import
        all_ads = [
            addr for addr in out_ads.keys() if filter_import(target_pe, addr)]
        if not all_ads:
            continue
        # Keep non-NULL elements
        all_ads.sort()
        for i, x in enumerate(all_ads):
            if x not in [0,  None]:
                break
        all_ads = all_ads[i:]
        log.debug('ads: %s', map(hex, all_ads))
        while all_ads:
            # Find libname's Import Address Table
            othunk = all_ads[0]
            i = 0
            while (i + 1 < len(all_ads) and
                   all_ads[i] + target_pe._wsize / 8 == all_ads[i + 1]):
                i += 1
            # 'i + 1' is IAT's length
            # Effectively build an IMAGE_IMPORT_DESCRIPTOR
            funcs = [out_ads[addr] for addr in all_ads[:i + 1]]
            try:
                rva = target_pe.virt2rva(othunk)
            except pe.InvalidOffset:
                pass
            else:
                new_lib.append(({"name": lib_name,
                                 "firstthunk": rva},
                                funcs)
                               )
            # Update elements to handle
            all_ads = all_ads[i + 1:]
    return new_lib

def lib_get_add_base(

self, name)

def lib_get_add_base(self, name):
    name = name.lower().strip(' ')
    if not "." in name:
        log.debug('warning adding .dll to modulename')
        name += '.dll'
        log.debug(name)
    if name in self.name2off:
        ad = self.name2off[name]
    else:
        ad = self.libbase_ad
        log.warning("Create dummy entry for %r", name)
        self.fake_libs.add(name)
        self.name2off[name] = ad
        self.libbase2lastad[ad] = ad + 0x4
        self.lib_imp2ad[ad] = {}
        self.lib_imp2dstad[ad] = {}
        self.libbase_ad += 0x1000
    return ad

def lib_get_add_func(

self, libad, imp_ord_or_name, dst_ad=None)

def lib_get_add_func(self, libad, imp_ord_or_name, dst_ad=None):
    if not libad in self.name2off.values():
        raise ValueError('unknown lib base!', hex(libad))
    # test if not ordinatl
    # if imp_ord_or_name >0x10000:
    #    imp_ord_or_name = vm_get_str(imp_ord_or_name, 0x100)
    #    imp_ord_or_name = imp_ord_or_name[:imp_ord_or_name.find('\x00')]
    #/!\ can have multiple dst ad
    if not imp_ord_or_name in self.lib_imp2dstad[libad]:
        self.lib_imp2dstad[libad][imp_ord_or_name] = set()
    self.lib_imp2dstad[libad][imp_ord_or_name].add(dst_ad)
    if imp_ord_or_name in self.lib_imp2ad[libad]:
        return self.lib_imp2ad[libad][imp_ord_or_name]
    # log.debug('new imp %s %s' % (imp_ord_or_name, dst_ad))
    ad = self.libbase2lastad[libad]
    self.libbase2lastad[libad] += 0x10  # arbitrary
    self.lib_imp2ad[libad][imp_ord_or_name] = ad
    name_inv = dict([(x[1], x[0]) for x in self.name2off.items()])
    c_name = canon_libname_libfunc(name_inv[libad], imp_ord_or_name)
    self.fad2cname[ad] = c_name
    self.cname2addr[c_name] = ad
    self.fad2info[ad] = libad, imp_ord_or_name
    return ad