Miasm2
 All Classes Namespaces Files Functions Variables Typedefs Properties Macros
pe.py
Go to the documentation of this file.
1 import os
2 import struct
3 import logging
4 from collections import defaultdict
5 
6 from elfesteem import pe
7 from elfesteem import cstruct
8 from elfesteem import *
9 
10 from miasm2.jitter.csts import *
11 from miasm2.jitter.loader.utils import canon_libname_libfunc, libimp
12 
13 
14 log = logging.getLogger('loader_pe')
15 hnd = logging.StreamHandler()
16 hnd.setFormatter(logging.Formatter("[%(levelname)s]: %(message)s"))
17 log.addHandler(hnd)
18 log.setLevel(logging.CRITICAL)
19 
20 
22  import2addr = defaultdict(set)
23  if e.DirImport.impdesc is None:
24  return import2addr
25  for s in e.DirImport.impdesc:
26  # fthunk = e.rva2virt(s.firstthunk)
27  # l = "%2d %-25s %s" % (i, repr(s.dlldescname), repr(s))
28  libname = s.dlldescname.name.lower()
29  for ii, imp in enumerate(s.impbynames):
30  if isinstance(imp, pe.ImportByName):
31  funcname = imp.name
32  else:
33  funcname = imp
34  # l = " %2d %-16s" % (ii, repr(funcname))
35  import2addr[(libname, funcname)].add(
36  e.rva2virt(s.firstthunk + e._wsize * ii / 8))
37  return import2addr
38 
39 
40 def preload_pe(vm, e, runtime_lib, patch_vm_imp=True):
41  fa = get_import_address_pe(e)
42  dyn_funcs = {}
43  # log.debug('imported funcs: %s' % fa)
44  for (libname, libfunc), ads in fa.items():
45  for ad in ads:
46  ad_base_lib = runtime_lib.lib_get_add_base(libname)
47  ad_libfunc = runtime_lib.lib_get_add_func(ad_base_lib, libfunc, ad)
48 
49  libname_s = canon_libname_libfunc(libname, libfunc)
50  dyn_funcs[libname_s] = ad_libfunc
51  if patch_vm_imp:
52  vm.set_mem(
53  ad, struct.pack(cstruct.size2type[e._wsize], ad_libfunc))
54  return dyn_funcs
55 
56 
58  # test is ad points to code or dll name
59  out = ''
60  for i in xrange(0x200):
61  c = e.virt(ad + i)
62  if c == "\x00":
63  break
64  out += c
65  if not (c.isalnum() or c in "_.-+*$@&#()[]={}"):
66  return False
67  if not "." in out:
68  return False
69  i = out.find('.')
70  return out[:i], out[i + 1:]
71 
72 
74  out = []
75  # add func name
76  for i, n in enumerate(e.DirExport.f_names):
77  addr = e.DirExport.f_address[e.DirExport.f_nameordinals[i].ordinal]
78  f_name = n.name.name
79  # log.debug('%s %s' % (f_name, hex(e.rva2virt(addr.rva))))
80  out.append((f_name, e.rva2virt(addr.rva)))
81 
82  # add func ordinal
83  for i, o in enumerate(e.DirExport.f_nameordinals):
84  addr = e.DirExport.f_address[o.ordinal]
85  # log.debug('%s %s %s' % (o.ordinal, e.DirExport.expdesc.base,
86  # hex(e.rva2virt(addr.rva))))
87  out.append(
88  (o.ordinal + e.DirExport.expdesc.base, e.rva2virt(addr.rva)))
89  return out
90 
91 
92 def vm_load_pe(vm, fdata, align_s=True, load_hdr=True, **kargs):
93  """Load a PE in memory (@vm) from a data buffer @fdata
94  @vm: VmMngr instance
95  @fdata: data buffer to parse
96  @align_s: (optional) If False, keep gaps between section
97  @load_hdr: (optional) If False, do not load the NThdr in memory
98  Return the corresponding PE instance.
99 
100  Extra arguments are passed to PE instanciation.
101  If all sections are aligned, they will be mapped on several different pages
102  Otherwise, a big page is created, containing all sections
103  """
104  # Parse and build a PE instance
105  pe = pe_init.PE(fdata, **kargs)
106 
107  # Check if all section are aligned
108  aligned = True
109  for section in pe.SHList:
110  if section.addr & 0xFFF:
111  aligned = False
112  break
113 
114  if aligned:
115  # Loader NT header
116  if load_hdr:
117  # Header length
118  hdr_len = max(0x200, pe.NThdr.sizeofheaders)
119  # Page minimum size
120  min_len = min(pe.SHList[0].addr, 0x1000)
121 
122  # Get and pad the pe_hdr
123  pe_hdr = pe.content[:hdr_len] + max(
124  0, (min_len - hdr_len)) * "\x00"
125  vm.add_memory_page(pe.NThdr.ImageBase, PAGE_READ | PAGE_WRITE,
126  pe_hdr)
127 
128  # Align sections size
129  if align_s:
130  # Use the next section address to compute the new size
131  for i, section in enumerate(pe.SHList[:-1]):
132  new_size = pe.SHList[i + 1].addr - section.addr
133  section.size = new_size
134  section.rawsize = new_size
135  section.data = strpatchwork.StrPatchwork(
136  section.data[:new_size])
137  section.offset = section.addr
138 
139  # Last section alignement
140  last_section = pe.SHList[-1]
141  last_section.size = (last_section.size + 0xfff) & 0xfffff000
142 
143  # Pad sections with null bytes and map them
144  for section in pe.SHList:
145  data = str(section.data)
146  data += "\x00" * (section.size - len(data))
147  vm.add_memory_page(pe.rva2virt(section.addr),
148  PAGE_READ | PAGE_WRITE, data)
149 
150  return pe
151 
152  # At least one section is not aligned
153  log.warning('PE is not aligned, creating big section')
154  min_addr = 0 if load_hdr else None
155  max_addr = None
156  data = ""
157 
158  for i, section in enumerate(pe.SHList):
159  if i < len(pe.SHList) - 1:
160  # If it is not the last section, use next section address
161  section.size = pe.SHList[i + 1].addr - section.addr
162  section.rawsize = section.size
163  section.offset = section.addr
164 
165  # Update min and max addresses
166  if min_addr is None or section.addr < min_addr:
167  min_addr = section.addr
168  max_section_len = max(section.size, len(section.data))
169  if max_addr is None or section.addr + max_section_len > max_addr:
170  max_addr = section.addr + max_section_len
171 
172  min_addr = pe.rva2virt(min_addr)
173  max_addr = pe.rva2virt(max_addr)
174  log.debug('Min: 0x%x, Max: 0x%x, Size: 0x%x', min_addr, max_addr,
175  (max_addr - min_addr))
176 
177  # Create only one big section containing the whole PE
178  vm.add_memory_page(min_addr,
179  PAGE_READ | PAGE_WRITE,
180  (max_addr - min_addr) * "\x00")
181 
182  # Copy each sections content in memory
183  for section in pe.SHList:
184  log.debug('Map 0x%x bytes to 0x%x', len(section.data),
185  pe.rva2virt(section.addr))
186  vm.set_mem(pe.rva2virt(section.addr), str(section.data))
187 
188  return pe
189 
190 
191 def vm_load_pe_lib(vm, fname_in, libs, lib_path_base, **kargs):
192  """Call vm_load_pe on @fname_in and update @libs accordingly
193  @vm: VmMngr instance
194  @fname_in: library name
195  @libs: libimp_pe instance
196  @lib_path_base: DLLs relative path
197  Return the corresponding PE instance
198  Extra arguments are passed to vm_load_pe
199  """
200  fname = os.path.join(lib_path_base, fname_in)
201  with open(fname) as fstream:
202  pe = vm_load_pe(vm, fstream.read(), **kargs)
203  libs.add_export_lib(pe, fname_in)
204  return pe
205 
206 
207 def vm_load_pe_libs(vm, libs_name, libs, lib_path_base="win_dll", **kargs):
208  """Call vm_load_pe_lib on each @libs_name filename
209  @vm: VmMngr instance
210  @libs_name: list of str
211  @libs: libimp_pe instance
212  @lib_path_base: (optional) DLLs relative path
213  Return a dictionnary Filename -> PE instances
214  Extra arguments are passed to vm_load_pe_lib
215  """
216  return {fname: vm_load_pe_lib(vm, fname, libs, lib_path_base, **kargs)
217  for fname in libs_name}
218 
219 
220 def vm_fix_imports_pe_libs(lib_imgs, libs, lib_path_base="win_dll",
221  patch_vm_imp=True, **kargs):
222  for e in lib_imgs.values():
223  preload_pe(e, libs, patch_vm_imp)
224 
225 
226 def vm2pe(myjit, fname, libs=None, e_orig=None,
227  min_addr=None, max_addr=None,
228  min_section_offset=0x1000, img_base=None,
229  added_funcs=None):
230  if e_orig:
231  size = e_orig._wsize
232  else:
233  size = 32
234  mye = pe_init.PE(wsize=size)
235 
236  if min_addr is None and e_orig is not None:
237  min_addr = min([e_orig.rva2virt(s.addr) for s in e_orig.SHList])
238  if max_addr is None and e_orig is not None:
239  max_addr = max([e_orig.rva2virt(s.addr + s.size)
240  for s in e_orig.SHList])
241 
242  if img_base is None:
243  img_base = e_orig.NThdr.ImageBase
244 
245  mye.NThdr.ImageBase = img_base
246  all_mem = myjit.vm.get_all_memory()
247  addrs = all_mem.keys()
248  addrs.sort()
249  mye.Opthdr.AddressOfEntryPoint = mye.virt2rva(myjit.pc)
250  first = True
251  for ad in addrs:
252  if not min_addr <= ad < max_addr:
253  continue
254  log.debug("0x%x", ad)
255  if first:
256  mye.SHList.add_section(
257  "%.8X" % ad,
258  addr=ad - mye.NThdr.ImageBase,
259  data=all_mem[ad]['data'],
260  offset=min_section_offset)
261  else:
262  mye.SHList.add_section(
263  "%.8X" % ad,
264  addr=ad - mye.NThdr.ImageBase,
265  data=all_mem[ad]['data'])
266  first = False
267  if libs:
268  if added_funcs is not None:
269  # name_inv = dict([(x[1], x[0]) for x in libs.name2off.items()])
270 
271  for addr, funcaddr in added_func:
272  libbase, dllname = libs.fad2info[funcaddr]
273  libs.lib_get_add_func(libbase, dllname, addr)
274 
275  new_dll = libs.gen_new_lib(mye, mye.virt.is_addr_in)
276  else:
277  new_dll = {}
278 
279  log.debug('%s', new_dll)
280 
281  mye.DirImport.add_dlldesc(new_dll)
282  s_imp = mye.SHList.add_section("import", rawsize=len(mye.DirImport))
283  mye.DirImport.set_rva(s_imp.addr)
284  log.debug('%r', mye.SHList)
285  if e_orig:
286  # resource
287  xx = str(mye)
288  mye.content = xx
289  ad = e_orig.NThdr.optentries[pe.DIRECTORY_ENTRY_RESOURCE].rva
290  log.debug('dirres 0x%x', ad)
291  if ad != 0:
292  mye.NThdr.optentries[pe.DIRECTORY_ENTRY_RESOURCE].rva = ad
293  mye.DirRes = pe.DirRes.unpack(xx, ad, mye)
294  # log.debug('%s' % repr(mye.DirRes))
295  s_res = mye.SHList.add_section(
296  name="myres", rawsize=len(mye.DirRes))
297  mye.DirRes.set_rva(s_res.addr)
298  log.debug('%r', mye.DirRes)
299  # generation
300  open(fname, 'w').write(str(mye))
301  return mye
302 
303 
305 
306  def add_export_lib(self, e, name):
307  self.all_exported_lib.append(e)
308  # will add real lib addresses to database
309  if name in self.name2off:
310  ad = self.name2off[name]
311  else:
312  log.debug('new lib %s', name)
313  ad = e.NThdr.ImageBase
314  libad = ad
315  self.name2off[name] = ad
316  self.libbase2lastad[ad] = ad + 0x1
317  self.lib_imp2ad[ad] = {}
318  self.lib_imp2dstad[ad] = {}
319  self.libbase_ad += 0x1000
320 
322  todo = ads
323  # done = []
324  while todo:
325  # for imp_ord_or_name, ad in ads:
326  imp_ord_or_name, ad = todo.pop()
327 
328  # if export is a redirection, search redirected dll
329  # and get function real addr
330  ret = is_redirected_export(e, ad)
331  if ret:
332  exp_dname, exp_fname = ret
333  # log.debug('export redirection %s' % imp_ord_or_name)
334  # log.debug('source %s %s' % (exp_dname, exp_fname))
335  exp_dname = exp_dname + '.dll'
336  exp_dname = exp_dname.lower()
337  # if dll auto refes in redirection
338  if exp_dname == name:
339  libad_tmp = self.name2off[exp_dname]
340  if not exp_fname in self.lib_imp2ad[libad_tmp]:
341  # schedule func
342  todo = [(imp_ord_or_name, ad)] + todo
343  continue
344  elif not exp_dname in self.name2off:
345  raise ValueError('load %r first' % exp_dname)
346  c_name = canon_libname_libfunc(exp_dname, exp_fname)
347  libad_tmp = self.name2off[exp_dname]
348  ad = self.lib_imp2ad[libad_tmp][exp_fname]
349  # log.debug('%s' % hex(ad))
350  # if not imp_ord_or_name in self.lib_imp2dstad[libad]:
351  # self.lib_imp2dstad[libad][imp_ord_or_name] = set()
352  # self.lib_imp2dstad[libad][imp_ord_or_name].add(dst_ad)
353 
354  # log.debug('new imp %s %s' % (imp_ord_or_name, hex(ad)))
355  self.lib_imp2ad[libad][imp_ord_or_name] = ad
356 
357  name_inv = dict([(x[1], x[0]) for x in self.name2off.items()])
358  c_name = canon_libname_libfunc(
359  name_inv[libad], imp_ord_or_name)
360  self.fad2cname[ad] = c_name
361  self.fad2info[ad] = libad, imp_ord_or_name
362 
363  def gen_new_lib(self, target_pe, flt=lambda _: True):
364  """Gen a new DirImport description
365  @target_pe: PE instance
366  @flt: (boolean f(address)) restrict addresses to keep
367  """
368 
369  new_lib = []
370  for lib_name, ad in self.name2off.items():
371  # Build an IMAGE_IMPORT_DESCRIPTOR
372 
373  # Get fixed addresses
374  out_ads = dict() # addr -> func_name
375  for func_name, dst_addresses in self.lib_imp2dstad[ad].items():
376  out_ads.update({addr: func_name for addr in dst_addresses})
377 
378  # Filter available addresses according to @flt
379  all_ads = [addr for addr in out_ads.keys() if flt(addr)]
380  log.debug('ads: %s', map(hex, all_ads))
381  if not all_ads:
382  continue
383 
384  # Keep non-NULL elements
385  all_ads.sort()
386  for i, x in enumerate(all_ads):
387  if x not in [0, None]:
388  break
389  all_ads = all_ads[i:]
390 
391  while all_ads:
392  # Find libname's Import Address Table
393  othunk = all_ads[0]
394  i = 0
395  while (i + 1 < len(all_ads) and
396  all_ads[i] + target_pe._wsize / 8 == all_ads[i + 1]):
397  i += 1
398  # 'i + 1' is IAT's length
399 
400  # Effectively build an IMAGE_IMPORT_DESCRIPTOR
401  funcs = [out_ads[addr] for addr in all_ads[:i + 1]]
402  try:
403  rva = target_pe.virt2rva(othunk)
404  except pe.InvalidOffset:
405  pass
406  else:
407  new_lib.append(({"name": lib_name,
408  "firstthunk": rva},
409  funcs)
410  )
411 
412  # Update elements to handle
413  all_ads = all_ads[i + 1:]
414 
415  return new_lib
416 
417 # machine -> arch
418 PE_machine = {0x14c: "x86_32",
419  0x8664: "x86_64",
420  }
421 
422 
423 def guess_arch(pe):
424  """Return the architecture specified by the PE container @pe.
425  If unknown, return None"""
426  return PE_machine.get(pe.Coffhdr.machine, None)
def is_redirected_export
Definition: pe.py:57
def get_export_name_addr_list
Definition: pe.py:73
def get_import_address_pe
Definition: pe.py:21
def vm_fix_imports_pe_libs
Definition: pe.py:221