Miasm2
 All Classes Namespaces Files Functions Variables Typedefs Properties Macros
asmbloc.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 #-*- coding:utf-8 -*-
3 
4 import logging
5 import inspect
6 import re
7 
8 
9 import miasm2.expression.expression as m2_expr
10 from miasm2.expression.simplifications import expr_simp
11 from miasm2.expression.modint import moduint, modint
12 from miasm2.core.utils import Disasm_Exception, pck
13 from miasm2.core.graph import DiGraph
14 from miasm2.core.interval import interval
15 
16 log_asmbloc = logging.getLogger("asmblock")
17 console_handler = logging.StreamHandler()
18 console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s"))
19 log_asmbloc.addHandler(console_handler)
20 log_asmbloc.setLevel(logging.WARNING)
21 
22 
23 def is_int(a):
24  return isinstance(a, int) or isinstance(a, long) or \
25  isinstance(a, moduint) or isinstance(a, modint)
26 
27 
29  return isinstance(e, m2_expr.ExprId) and isinstance(e.name, asm_label)
30 
31 
33  return isinstance(e, m2_expr.ExprInt) or \
34  (isinstance(e, m2_expr.ExprId) and isinstance(e.name, asm_label))
35 
36 
37 class asm_label:
38 
39  "Stand for an assembly label"
40 
41  def __init__(self, name="", offset=None):
42  self.fixedblocs = False
43  if is_int(name):
44  name = "loc_%.16X" % (int(name) & 0xFFFFFFFFFFFFFFFF)
45  self.name = name
46  self.attrib = None
47  if offset is None:
48  self.offset = offset
49  else:
50  self.offset = int(offset)
51 
52  def __str__(self):
53  if isinstance(self.offset, (int, long)):
54  return "%s:0x%08x" % (self.name, self.offset)
55  else:
56  return "%s:%s" % (self.name, str(self.offset))
57 
58  def __repr__(self):
59  rep = '<asmlabel '
60  if self.name:
61  rep += repr(self.name) + ' '
62  rep += '>'
63  return rep
64 
65 
66 class asm_raw:
67 
68  def __init__(self, raw=""):
69  self.raw = raw
70 
71  def __str__(self):
72  return repr(self.raw)
73 
74 
76  c_to = "c_to"
77  c_next = "c_next"
78  c_bad = "c_bad"
79 
80  def __init__(self, label=None, c_t=c_to):
81  self.label = label
82  self.c_t = c_t
83 
84  def __str__(self):
85  return "%s:%s" % (str(self.c_t), str(self.label))
86 
87 
89 
90  def __init__(self, label=None):
91  super(asm_constraint_next, self).__init__(
92  label, c_t=asm_constraint.c_next)
93 
94 
96 
97  def __init__(self, label=None):
98  super(asm_constraint_to, self).__init__(
99  label, c_t=asm_constraint.c_to)
100 
101 
103 
104  def __init__(self, label=None):
105  super(asm_constraint_bad, self).__init__(
106  label, c_t=asm_constraint.c_bad)
107 
108 
110 
111  def __init__(self, label=None, alignment=1):
112  self.bto = set()
113  self.lines = []
114  self.label = label
115  self.alignment = alignment
116 
117  def __str__(self):
118  out = []
119  out.append(str(self.label))
120  for l in self.lines:
121  out.append(str(l))
122  if self.bto:
123  lbls = ["->"]
124  for l in self.bto:
125  if l is None:
126  lbls.append("Unknown? ")
127  else:
128  lbls.append(str(l) + " ")
129  lbls = '\t'.join(lbls)
130  out.append(lbls)
131  return '\n'.join(out)
132 
133  def addline(self, l):
134  self.lines.append(l)
135 
136  def addto(self, c):
137  assert type(self.bto) is set
138  self.bto.add(c)
139 
140  def split(self, offset, l):
141  log_asmbloc.debug('split at %x', offset)
142  i = -1
143  offsets = [x.offset for x in self.lines]
144  if not l.offset in offsets:
145  log_asmbloc.warning(
146  'cannot split bloc at %X ' % offset +
147  'middle instruction? default middle')
148  offsets.sort()
149  return None
150  new_bloc = asm_bloc(l)
151  i = offsets.index(offset)
152 
153  self.lines, new_bloc.lines = self.lines[:i], self.lines[i:]
154  flow_mod_instr = self.get_flow_instr()
155  log_asmbloc.debug('flow mod %r', flow_mod_instr)
156  c = asm_constraint(l, asm_constraint.c_next)
157  # move dst if flowgraph modifier was in original bloc
158  # (usecase: split delayslot bloc)
159  if flow_mod_instr:
160  for xx in self.bto:
161  log_asmbloc.debug('lbl %s', xx)
162  c_next = set(
163  [x for x in self.bto if x.c_t == asm_constraint.c_next])
164  c_to = [x for x in self.bto if x.c_t != asm_constraint.c_next]
165  self.bto = set([c] + c_to)
166  new_bloc.bto = c_next
167  else:
168  new_bloc.bto = self.bto
169  self.bto = set([c])
170  return new_bloc
171 
172  def get_range(self):
173  if len(self.lines):
174  return self.lines[0].offset, self.lines[-1].offset
175  else:
176  return 0, 0
177 
178  def get_offsets(self):
179  return [x.offset for x in self.lines]
180 
181  def add_cst(self, offset, c_t, symbol_pool):
182  if type(offset) in [int, long]:
183  l = symbol_pool.getby_offset_create(offset)
184  elif type(offset) is str:
185  l = symbol_pool.getby_name_create(offset)
186  elif isinstance(offset, asm_label):
187  l = offset
188  else:
189  raise ValueError('unknown offset type %r' % offset)
190  c = asm_constraint(l, c_t)
191  self.bto.add(c)
192 
193  def get_flow_instr(self):
194  if not self.lines:
195  return None
196  for i in xrange(-1, -1 - self.lines[0].delayslot - 1, -1):
197  if not 0 <= i < len(self.lines):
198  return None
199  l = self.lines[i]
200  if l.splitflow() or l.breakflow():
201  raise NotImplementedError('not fully functional')
202 
203  def get_subcall_instr(self):
204  if not self.lines:
205  return None
206  delayslot = self.lines[0].delayslot
207  end_index = len(self.lines) - 1
208  ds_max_index = max(end_index - delayslot, 0)
209  for i in xrange(end_index, ds_max_index - 1, -1):
210  l = self.lines[i]
211  if l.is_subcall():
212  return l
213  return None
214 
215  def get_next(self):
216  for x in self.bto:
217  if x.c_t == asm_constraint.c_next:
218  return x.label
219  return None
220 
221 
223 
224  def __init__(self):
225  self._labels = []
226  self._name2label = {}
227  self._offset2label = {}
228  self._label_num = 0
229 
230  def add_label(self, name, offset=None):
231  """
232  Create and add a label to the symbol_pool
233  @name: label's name
234  @offset: (optional) label's offset
235  """
236  label = asm_label(name, offset)
237 
238  # Test for collisions
239  if (label.offset in self._offset2label and
240  label != self._offset2label[label.offset]):
241  raise ValueError('symbol %s has same offset as %s' %
242  (label, self._offset2label[label.offset]))
243  if (label.name in self._name2label and
244  label != self._name2label[label.name]):
245  raise ValueError('symbol %s has same name as %s' %
246  (label, self._name2label[label.name]))
247 
248  self._labels.append(label)
249  if label.offset is not None:
250  self._offset2label[label.offset] = label
251  if label.name != "":
252  self._name2label[label.name] = label
253  return label
254 
255  def remove_label(self, label):
256  """
257  Delete a @label
258  """
259  self._name2label.pop(label.name, None)
260  self._offset2label.pop(label.offset, None)
261  if label in self._labels:
262  self._labels.remove(label)
263 
264  def del_label_offset(self, label):
265  """Unpin the @label from its offset"""
266  self._offset2label.pop(label.offset, None)
267  label.offset = None
268 
269  def getby_offset(self, offset):
270  """Retrieve label using its @offset"""
271  return self._offset2label.get(offset, None)
272 
273  def getby_name(self, name):
274  """Retrieve label using its @name"""
275  return self._name2label.get(name, None)
276 
277  def getby_name_create(self, name):
278  """Get a label from its @name, create it if it doesn't exist"""
279  label = self.getby_name(name)
280  if label is None:
281  label = self.add_label(name)
282  return label
283 
284  def getby_offset_create(self, offset):
285  """Get a label from its @offset, create it if it doesn't exist"""
286  label = self.getby_offset(offset)
287  if label is None:
288  label = self.add_label(offset, offset)
289  return label
290 
291  def rename_label(self, label, newname):
292  """Rename the @label name to @newname"""
293  if newname in self._name2label:
294  raise ValueError('Symbol already known')
295  self._name2label.pop(label.name, None)
296  label.name = newname
297  self._name2label[label.name] = label
298 
299  def set_offset(self, label, offset):
300  """Pin the @label from at @offset
301  Note that there is a special case when the offset is a list
302  it happens when offsets are recomputed in resolve_symbol*
303  """
304  if label is None:
305  raise ValueError('label should not be None')
306  if not label.name in self._name2label:
307  raise ValueError('label %s not in symbol pool' % label)
308  if offset is not None and offset in self._offset2label:
309  raise ValueError('Conflict in label %s' % label)
310  self._offset2label.pop(label.offset, None)
311  label.offset = offset
312  if is_int(label.offset):
313  self._offset2label[label.offset] = label
314 
315  @property
316  def items(self):
317  """Return all labels"""
318  return self._labels
319 
320  def __str__(self):
321  return reduce(lambda x, y: x + str(y) + '\n', self._labels, "")
322 
323  def __getitem__(self, item):
324  if item in self._name2label:
325  return self._name2label[item]
326  if item in self._offset2label:
327  return self._offset2label[item]
328  raise KeyError('unknown symbol %r' % item)
329 
330  def __contains__(self, item):
331  return item in self._name2label or item in self._offset2label
332 
333  def merge(self, symbol_pool):
334  """Merge with another @symbol_pool"""
335  self._labels += symbol_pool._labels
336  self._name2label.update(symbol_pool._name2label)
337  self._offset2label.update(symbol_pool._offset2label)
338 
339  def gen_label(self):
340  """Generate a new unpinned label"""
341  label = self.add_label("lbl_gen_%.8X" % (self._label_num))
342  self._label_num += 1
343  return label
344 
345 
346 def dis_bloc(mnemo, pool_bin, cur_bloc, offset, job_done, symbol_pool,
347  dont_dis=[], split_dis=[
348  ], follow_call=False, dontdis_retcall=False, lines_wd=None,
349  dis_bloc_callback=None, dont_dis_nulstart_bloc=False,
350  attrib={}):
351  # pool_bin.offset = offset
352  lines_cpt = 0
353  in_delayslot = False
354  delayslot_count = mnemo.delayslot
355  offsets_to_dis = set()
356  add_next_offset = False
357  log_asmbloc.debug("dis at %X", int(offset))
358  while not in_delayslot or delayslot_count > 0:
359  if in_delayslot:
360  delayslot_count -= 1
361 
362  if offset in dont_dis or (lines_cpt > 0 and offset in split_dis):
363  cur_bloc.add_cst(offset, asm_constraint.c_next, symbol_pool)
364  offsets_to_dis.add(offset)
365  break
366 
367  lines_cpt += 1
368  if lines_wd is not None and lines_cpt > lines_wd:
369  # log_asmbloc.warning( "lines watchdog reached at %X"%int(offset))
370  break
371 
372  if offset in job_done:
373  cur_bloc.add_cst(offset, asm_constraint.c_next, symbol_pool)
374  break
375 
376  off_i = offset
377  try:
378  # print repr(pool_bin.getbytes(offset, 4))
379  instr = mnemo.dis(pool_bin, attrib, offset)
380  except (Disasm_Exception, IOError), e:
381  log_asmbloc.warning(e)
382  instr = None
383 
384  if instr is None:
385  log_asmbloc.warning("cannot disasm at %X", int(off_i))
386  cur_bloc.add_cst(off_i, asm_constraint.c_bad, symbol_pool)
387  break
388 
389  # XXX TODO nul start block option
390  if dont_dis_nulstart_bloc and instr.b.count('\x00') == instr.l:
391  log_asmbloc.warning("reach nul instr at %X", int(off_i))
392  cur_bloc.add_cst(off_i, asm_constraint.c_bad, symbol_pool)
393  break
394 
395  # special case: flow graph modificator in delayslot
396  if in_delayslot and instr and (instr.splitflow() or instr.breakflow()):
397  add_next_offset = True
398  break
399 
400  job_done.add(offset)
401  log_asmbloc.debug("dis at %X", int(offset))
402 
403  offset += instr.l
404  log_asmbloc.debug(instr)
405  log_asmbloc.debug(instr.args)
406 
407  cur_bloc.addline(instr)
408  if not instr.breakflow():
409  continue
410  # test split
411  if instr.splitflow() and not (instr.is_subcall() and dontdis_retcall):
412  add_next_offset = True
413  # cur_bloc.add_cst(n, asm_constraint.c_next, symbol_pool)
414  pass
415  if instr.dstflow():
416  instr.dstflow2label(symbol_pool)
417  dst = instr.getdstflow(symbol_pool)
418  dstn = []
419  for d in dst:
420  if isinstance(d, m2_expr.ExprId) and \
421  isinstance(d.name, asm_label):
422  dstn.append(d.name)
423  dst = dstn
424  if (not instr.is_subcall()) or follow_call:
425  cur_bloc.bto.update(
426  [asm_constraint(x, asm_constraint.c_to) for x in dst])
427 
428  # get in delayslot mode
429  in_delayslot = True
430  delayslot_count = instr.delayslot
431 
432  for c in cur_bloc.bto:
433  if c.c_t == asm_constraint.c_bad:
434  continue
435  if isinstance(c.label, asm_label):
436  offsets_to_dis.add(c.label.offset)
437 
438  if add_next_offset:
439  cur_bloc.add_cst(offset, asm_constraint.c_next, symbol_pool)
440  offsets_to_dis.add(offset)
441 
442  if dis_bloc_callback is not None:
443  dis_bloc_callback(
444  mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, symbol_pool)
445  # print 'dst', [hex(x) for x in offsets_to_dis]
446  return offsets_to_dis
447 
448 
449 def split_bloc(mnemo, attrib, pool_bin, blocs,
450  symbol_pool, more_ref=None, dis_bloc_callback=None):
451  if not more_ref:
452  more_ref = []
453 
454  # get all possible dst
455  bloc_dst = [symbol_pool._offset2label[x] for x in more_ref]
456  for b in blocs:
457  for c in b.bto:
458  if not isinstance(c.label, asm_label):
459  continue
460  if c.c_t == asm_constraint.c_bad:
461  continue
462  bloc_dst.append(c.label)
463 
464  bloc_dst = [x.offset for x in bloc_dst if x.offset is not None]
465 
466  j = -1
467  while j < len(blocs) - 1:
468  j += 1
469  cb = blocs[j]
470  a, b = cb.get_range()
471 
472  for off in bloc_dst:
473  if not (off > a and off <= b):
474  continue
475  l = symbol_pool.getby_offset_create(off)
476  new_b = cb.split(off, l)
477  log_asmbloc.debug("split bloc %x", off)
478  if new_b is None:
479  log_asmbloc.error("cannot split %x!!", off)
480  continue
481  if dis_bloc_callback:
482  offsets_to_dis = set(
483  [x.label.offset for x in new_b.bto
484  if isinstance(x.label, asm_label)])
485  dis_bloc_callback(
486  mnemo, attrib, pool_bin, new_b, offsets_to_dis,
487  symbol_pool)
488  blocs.append(new_b)
489  a, b = cb.get_range()
490 
491  return blocs
492 
493 
494 def dis_bloc_all(mnemo, pool_bin, offset, job_done, symbol_pool, dont_dis=[],
495  split_dis=[], follow_call=False, dontdis_retcall=False,
496  blocs_wd=None, lines_wd=None, blocs=None,
497  dis_bloc_callback=None, dont_dis_nulstart_bloc=False,
498  attrib={}):
499  log_asmbloc.info("dis bloc all")
500  if blocs is None:
501  blocs = []
502  todo = [offset]
503 
504  bloc_cpt = 0
505  while len(todo):
506  bloc_cpt += 1
507  if blocs_wd is not None and bloc_cpt > blocs_wd:
508  log_asmbloc.debug("blocs watchdog reached at %X", int(offset))
509  break
510 
511  n = int(todo.pop(0))
512  if n is None:
513  continue
514  if n in job_done:
515  continue
516 
517  if n in dont_dis:
518  continue
519  dd_flag = False
520  for dd in dont_dis:
521  if not isinstance(dd, tuple):
522  continue
523  dd_a, dd_b = dd
524  if dd_a <= n < dd_b:
525  dd_flag = True
526  break
527  if dd_flag:
528  continue
529  l = symbol_pool.getby_offset_create(n)
530  cur_bloc = asm_bloc(l)
531  todo += dis_bloc(mnemo, pool_bin, cur_bloc, n, job_done, symbol_pool,
532  dont_dis, split_dis, follow_call, dontdis_retcall,
533  dis_bloc_callback=dis_bloc_callback,
534  lines_wd=lines_wd,
535  dont_dis_nulstart_bloc=dont_dis_nulstart_bloc,
536  attrib=attrib)
537  blocs.append(cur_bloc)
538 
539  return split_bloc(mnemo, attrib, pool_bin, blocs,
540  symbol_pool, dis_bloc_callback=dis_bloc_callback)
541 
542 
543 def bloc2graph(blocks, label=False, lines=True):
544  """Render dot graph of @blocks"""
545 
546  escape_chars = re.compile('[' + re.escape('{}') + ']')
547  label_attr = 'colspan="2" align="center" bgcolor="grey"'
548  edge_attr = 'label = "%s" color="%s" style="bold"'
549  td_attr = 'align="left"'
550  block_attr = 'shape="Mrecord" fontname="Courier New"'
551 
552  out = ["digraph asm_graph {"]
553  fix_chars = lambda x: '\\' + x.group()
554 
555  # Generate basic blocks
556  out_blocks = []
557  for block in blocks:
558  out_block = '%s [\n' % block.label.name
559  out_block += "%s " % block_attr
560  out_block += 'label =<<table border="0" cellborder="0" cellpadding="3">'
561 
562  block_label = '<tr><td %s>%s</td></tr>' % (
563  label_attr, block.label.name)
564  block_html_lines = []
565  if lines:
566  for line in block.lines:
567  if label:
568  out_render = "%.8X</td><td %s> " % (line.offset, td_attr)
569  else:
570  out_render = ""
571  out_render += escape_chars.sub(fix_chars, str(line))
572  block_html_lines.append(out_render)
573  block_html_lines = ('<tr><td %s>' % td_attr +
574  ('</td></tr><tr><td %s>' % td_attr).join(block_html_lines) +
575  '</td></tr>')
576  out_block += "%s " % block_label
577  out_block += block_html_lines + "</table>> ];"
578  out_blocks.append(out_block)
579 
580  out += out_blocks
581 
582  # Generate links
583  for block in blocks:
584  for next_b in block.bto:
585  if (isinstance(next_b.label, m2_expr.ExprId) or
586  isinstance(next_b.label, asm_label)):
587  src, dst, cst = block.label.name, next_b.label.name, next_b.c_t
588  else:
589  continue
590  if isinstance(src, asm_label):
591  src = src.name
592  if isinstance(dst, asm_label):
593  dst = dst.name
594 
595  edge_color = "black"
596  if next_b.c_t == asm_constraint.c_next:
597  edge_color = "red"
598  elif next_b.c_t == asm_constraint.c_to:
599  edge_color = "limegreen"
600  # special case
601  if len(block.bto) == 1:
602  edge_color = "blue"
603 
604  out.append('%s -> %s' % (src, dst) +
605  '[' + edge_attr % (cst, edge_color) + '];')
606 
607  out.append("}")
608  return '\n'.join(out)
609 
610 
611 def conservative_asm(mnemo, instr, symbols, conservative):
612  """
613  Asm instruction;
614  Try to keep original instruction bytes if it exists
615  """
616  candidates = mnemo.asm(instr, symbols)
617  if not candidates:
618  raise ValueError('cannot asm:%s' % str(instr))
619  if not hasattr(instr, "b"):
620  return candidates[0], candidates
621  if instr.b in candidates:
622  return instr.b, candidates
623  if conservative:
624  for c in candidates:
625  if len(c) == len(instr.b):
626  return c, candidates
627  return candidates[0], candidates
628 
629 
630 def fix_expr_val(expr, symbols):
631  """Resolve an expression @expr using @symbols"""
632  def expr_calc(e):
633  if isinstance(e, m2_expr.ExprId):
634  s = symbols._name2label[e.name]
635  e = m2_expr.ExprInt_from(e, s.offset)
636  return e
637  result = expr.visit(expr_calc)
638  result = expr_simp(result)
639  if not isinstance(result, m2_expr.ExprInt):
640  raise RuntimeError('Cannot resolve symbol %s' % expr)
641  return result
642 
643 
644 def guess_blocks_size(mnemo, blocks):
645  """Asm and compute max block size"""
646 
647  for block in blocks:
648  size = 0
649  for instr in block.lines:
650  if isinstance(instr, asm_raw):
651  # for special asm_raw, only extract len
652  if isinstance(instr.raw, list):
653  data = None
654  if len(instr.raw) == 0:
655  l = 0
656  else:
657  l = instr.raw[0].size / 8 * len(instr.raw)
658  elif isinstance(instr.raw, str):
659  data = instr.raw
660  l = len(data)
661  else:
662  raise NotImplementedError('asm raw')
663  else:
664  # Assemble the instruction to retrieve its len.
665  # If the instruction uses symbol it will fail
666  # In this case, the max_instruction_len is used
667  try:
668  candidates = mnemo.asm(instr)
669  l = len(candidates[-1])
670  except:
671  l = mnemo.max_instruction_len
672  data = None
673  instr.data = data
674  instr.l = l
675  size += l
676 
677  block.size = size
678  block.max_size = size
679  log_asmbloc.info("size: %d max: %d", block.size, block.max_size)
680 
681 
682 def fix_label_offset(symbol_pool, label, offset, modified):
683  """Fix the @label offset to @offset. If the @offset has changed, add @label
684  to @modified
685  @symbol_pool: current symbol_pool
686  """
687  if label.offset == offset:
688  return
689  symbol_pool.set_offset(label, offset)
690  modified.add(label)
691 
692 
694 
695  """Manage blocks linked with an asm_constraint_next"""
696 
697  def __init__(self, symbol_pool, blocks):
698  self.symbol_pool = symbol_pool
699  self.blocks = blocks
700  self.place()
701 
702  @property
703  def pinned(self):
704  """Return True iff at least one block is pinned"""
705  return self.pinned_block_idx is not None
706 
708  self.pinned_block_idx = None
709  for i, block in enumerate(self.blocks):
710  if is_int(block.label.offset):
711  if self.pinned_block_idx is not None:
712  raise ValueError("Multiples pinned block detected")
713  self.pinned_block_idx = i
714 
715  def place(self):
716  """Compute BlockChain min_offset and max_offset using pinned block and
717  blocks' size
718  """
719  self._set_pinned_block_idx()
720  self.max_size = 0
721  for block in self.blocks:
722  self.max_size += block.max_size + block.alignment - 1
723 
724  # Check if chain has one block pinned
725  if not self.pinned:
726  return
727 
728  offset_base = self.blocks[self.pinned_block_idx].label.offset
729  assert(offset_base % self.blocks[self.pinned_block_idx].alignment == 0)
730 
731  self.offset_min = offset_base
732  for block in self.blocks[:self.pinned_block_idx - 1:-1]:
733  self.offset_min -= block.max_size + \
734  (block.alignment - block.max_size) % block.alignment
735 
736  self.offset_max = offset_base
737  for block in self.blocks[self.pinned_block_idx:]:
738  self.offset_max += block.max_size + \
739  (block.alignment - block.max_size) % block.alignment
740 
741  def merge(self, chain):
742  """Best effort merge two block chains
743  Return the list of resulting blockchains"""
744  self.blocks += chain.blocks
745  self.place()
746  return [self]
747 
748  def fix_blocks(self, modified_labels):
749  """Propagate a pinned to its blocks' neighbour
750  @modified_labels: store new pinned labels"""
751 
752  if not self.pinned:
753  raise ValueError('Trying to fix unpinned block')
754 
755  # Propagate offset to blocks before pinned block
756  pinned_block = self.blocks[self.pinned_block_idx]
757  offset = pinned_block.label.offset
758  if offset % pinned_block.alignment != 0:
759  raise RuntimeError('Bad alignment')
760 
761  for block in self.blocks[:self.pinned_block_idx - 1:-1]:
762  new_offset = offset - block.size
763  new_offset = new_offset - new_offset % pinned_block.alignment
765  block.label,
766  new_offset,
767  modified_labels)
768 
769  # Propagate offset to blocks after pinned block
770  offset = pinned_block.label.offset + pinned_block.size
771 
772  last_block = pinned_block
773  for block in self.blocks[self.pinned_block_idx + 1:]:
774  offset += (- offset) % last_block.alignment
776  block.label,
777  offset,
778  modified_labels)
779  offset += block.size
780  last_block = block
781  return modified_labels
782 
783 
785 
786  """Stand for wedges between blocks"""
787 
788  def __init__(self, symbol_pool, offset, size):
789  self.symbol_pool = symbol_pool
790  self.offset = offset
791  self.max_size = size
792  self.offset_min = offset
793  self.offset_max = offset + size
794 
795  def merge(self, chain):
796  """Best effort merge two block chains
797  Return the list of resulting blockchains"""
798  self.symbol_pool.set_offset(chain.blocks[0].label, self.offset_max)
799  chain.place()
800  return [self, chain]
801 
802 
803 def group_constrained_blocks(symbol_pool, blocks):
804  """
805  Return the BlockChains list built from grouped asm blocks linked by
806  asm_constraint_next
807  @blocks: a list of asm block
808  """
809  log_asmbloc.info('group_constrained_blocks')
810 
811  # Group adjacent blocks
812  remaining_blocks = list(blocks)
813  known_block_chains = {}
814  lbl2block = {block.label: block for block in blocks}
815 
816  while remaining_blocks:
817  # Create a new block chain
818  block_list = [remaining_blocks.pop()]
819 
820  # Find sons in remainings blocks linked with a next constraint
821  while True:
822  # Get next block
823  next_label = block_list[-1].get_next()
824  if next_label is None or next_label not in lbl2block:
825  break
826  next_block = lbl2block[next_label]
827 
828  # Add the block at the end of the current chain
829  if next_block not in remaining_blocks:
830  break
831  block_list.append(next_block)
832  remaining_blocks.remove(next_block)
833 
834  # Check if son is in a known block group
835  if next_label is not None and next_label in known_block_chains:
836  block_list += known_block_chains[next_label]
837  del known_block_chains[next_label]
838 
839  known_block_chains[block_list[0].label] = block_list
840 
841  out_block_chains = []
842  for label in known_block_chains:
843  chain = BlockChain(symbol_pool, known_block_chains[label])
844  out_block_chains.append(chain)
845  return out_block_chains
846 
847 
848 def get_blockchains_address_interval(blockChains, dst_interval):
849  """Compute the interval used by the pinned @blockChains
850  Check if the placed chains are in the @dst_interval"""
851 
852  allocated_interval = interval()
853  for chain in blockChains:
854  if not chain.pinned:
855  continue
856  chain_interval = interval([(chain.offset_min, chain.offset_max - 1)])
857  if chain_interval not in dst_interval:
858  raise ValueError('Chain placed out of destination interval')
859  allocated_interval += chain_interval
860  return allocated_interval
861 
862 
863 def resolve_symbol(blockChains, symbol_pool, dst_interval=None):
864  """Place @blockChains in the @dst_interval"""
865 
866  log_asmbloc.info('resolve_symbol')
867  if dst_interval is None:
868  dst_interval = interval([(0, 0xFFFFFFFFFFFFFFFF)])
869 
870  forbidden_interval = interval(
871  [(-1, 0xFFFFFFFFFFFFFFFF + 1)]) - dst_interval
872  allocated_interval = get_blockchains_address_interval(blockChains,
873  dst_interval)
874  log_asmbloc.debug('allocated interval: %s', allocated_interval)
875 
876  pinned_chains = [chain for chain in blockChains if chain.pinned]
877 
878  # Add wedge in forbidden intervals
879  for start, stop in forbidden_interval.intervals:
880  wedge = BlockChainWedge(
881  symbol_pool, offset=start, size=stop + 1 - start)
882  pinned_chains.append(wedge)
883 
884  # Try to place bigger blockChains first
885  pinned_chains.sort(key=lambda x: x.offset_min)
886  blockChains.sort(key=lambda x: -x.max_size)
887 
888  fixed_chains = list(pinned_chains)
889 
890  log_asmbloc.debug("place chains")
891  for chain in blockChains:
892  if chain.pinned:
893  continue
894  fixed = False
895  for i in xrange(1, len(fixed_chains)):
896  prev_chain = fixed_chains[i - 1]
897  next_chain = fixed_chains[i]
898 
899  if prev_chain.offset_max + chain.max_size < next_chain.offset_min:
900  new_chains = prev_chain.merge(chain)
901  fixed_chains[i - 1:i] = new_chains
902  fixed = True
903  break
904  if not fixed:
905  raise RuntimeError('Cannot find enough space to place blocks')
906 
907  return [chain for chain in fixed_chains if isinstance(chain, BlockChain)]
908 
909 
911  """Extract labels from list of ExprId @exprs"""
912  return set(expr.name for expr in exprs if isinstance(expr.name, asm_label))
913 
914 
915 def get_block_labels(block):
916  """Extract labels used by @block"""
917  symbols = set()
918  for instr in block.lines:
919  if isinstance(instr, asm_raw):
920  if isinstance(instr.raw, list):
921  for expr in instr.raw:
922  symbols.update(m2_expr.get_expr_ids(expr))
923  else:
924  for arg in instr.args:
925  symbols.update(m2_expr.get_expr_ids(arg))
926  labels = filter_exprid_label(symbols)
927  return labels
928 
929 
930 def assemble_block(mnemo, block, symbol_pool, conservative=False):
931  """Assemble a @block using @symbol_pool
932  @conservative: (optional) use original bytes when possible
933  """
934  offset_i = 0
935 
936  for instr in block.lines:
937  if isinstance(instr, asm_raw):
938  if isinstance(instr.raw, list):
939  # Fix special asm_raw
940  data = ""
941  for expr in instr.raw:
942  expr_int = fix_expr_val(expr, symbol_pool)
943  data += pck[expr_int.size](expr_int.arg)
944  instr.data = data
945 
946  instr.offset = offset_i
947  offset_i += instr.l
948  continue
949 
950  # Assemble an instruction
951  saved_args = list(instr.args)
952  instr.offset = block.label.offset + offset_i
953 
954  # Replace instruction's arguments by resolved ones
955  instr.args = instr.resolve_args_with_symbols(symbol_pool)
956 
957  if instr.dstflow():
958  instr.fixDstOffset()
959 
960  old_l = instr.l
961  cached_candidate, candidates = conservative_asm(
962  mnemo, instr, symbol_pool, conservative)
963 
964  # Restore original arguments
965  instr.args = saved_args
966 
967  # We need to update the block size
968  block.size = block.size - old_l + len(cached_candidate)
969  instr.data = cached_candidate
970  instr.l = len(cached_candidate)
971 
972  offset_i += instr.l
973 
974 
975 def asmbloc_final(mnemo, blocks, blockChains, symbol_pool, conservative=False):
976  """Resolve and assemble @blockChains using @symbol_pool until fixed point is
977  reached"""
978 
979  log_asmbloc.debug("asmbloc_final")
980 
981  # Init structures
982  lbl2block = {block.label: block for block in blocks}
983  blocks_using_label = {}
984  for block in blocks:
985  labels = get_block_labels(block)
986  for label in labels:
987  blocks_using_label.setdefault(label, set()).add(block)
988 
989  block2chain = {}
990  for chain in blockChains:
991  for block in chain.blocks:
992  block2chain[block] = chain
993 
994  # Init worklist
995  blocks_to_rework = set(blocks)
996 
997  # Fix and re-assemble blocks until fixed point is reached
998  while True:
999 
1000  # Propagate pinned blocks into chains
1001  modified_labels = set()
1002  for chain in blockChains:
1003  chain.fix_blocks(modified_labels)
1004 
1005  for label in modified_labels:
1006  # Retrive block with modified reference
1007  if label in lbl2block:
1008  blocks_to_rework.add(lbl2block[label])
1009 
1010  # Enqueue blocks referencing a modified label
1011  if label not in blocks_using_label:
1012  continue
1013  for block in blocks_using_label[label]:
1014  blocks_to_rework.add(block)
1015 
1016  # No more work
1017  if not blocks_to_rework:
1018  break
1019 
1020  while blocks_to_rework:
1021  block = blocks_to_rework.pop()
1022  assemble_block(mnemo, block, symbol_pool, conservative)
1023 
1024 
1026  """Do sanity checks on blocks' constraints:
1027  * no multiple next constraint to same block
1028  * no next constraint to self"""
1029 
1030  blocks_graph = basicblocs(blocks)
1031  graph = blocks_graph.g
1032  for label in graph.nodes():
1033  if blocks_graph.blocs[label].get_next() == label:
1034  raise RuntimeError('Bad constraint: self in next')
1035  pred_next = set()
1036  for pred in graph.predecessors(label):
1037  if not pred in blocks_graph.blocs:
1038  continue
1039  if blocks_graph.blocs[pred].get_next() == label:
1040  pred_next.add(pred)
1041  if len(pred_next) > 1:
1042  raise RuntimeError("Too many next constraints for bloc %r" % label)
1043 
1044 
1045 def asm_resolve_final(mnemo, blocks, symbol_pool, dst_interval=None):
1046  """Resolve and assemble @blocks using @symbol_pool into interval
1047  @dst_interval"""
1048 
1049  sanity_check_blocks(blocks)
1050 
1051  guess_blocks_size(mnemo, blocks)
1052  blockChains = group_constrained_blocks(symbol_pool, blocks)
1053  resolved_blockChains = resolve_symbol(
1054  blockChains, symbol_pool, dst_interval)
1055 
1056  asmbloc_final(mnemo, blocks, resolved_blockChains, symbol_pool)
1057  patches = {}
1058  output_interval = interval()
1059 
1060  for block in blocks:
1061  offset = block.label.offset
1062  for instr in block.lines:
1063  if not instr.data:
1064  # Empty line
1065  continue
1066  assert len(instr.data) == instr.l
1067  patches[offset] = instr.data
1068  instruction_interval = interval([(offset, offset + instr.l - 1)])
1069  if not (instruction_interval & output_interval).empty:
1070  raise RuntimeError("overlapping bytes %X" % int(offset))
1071  instr.offset = offset
1072  offset += instr.l
1073  return patches
1074 
1075 
1076 def blist2graph(ab):
1077  """
1078  ab: list of asmbloc
1079  return: graph of asmbloc
1080  """
1081  g = DiGraph()
1082  g.lbl2bloc = {}
1083  for b in ab:
1084  g.lbl2bloc[b.label] = b
1085  g.add_node(b.label)
1086  for x in b.bto:
1087  g.add_edge(b.label, x.label)
1088  return g
1089 
1090 
1092 
1093  def __init__(self, ab=[]):
1094  self.blocs = {}
1095  self.g = DiGraph()
1096  self.add_blocs(ab)
1097 
1098  def add(self, b):
1099  self.blocs[b.label] = b
1100  self.g.add_node(b.label)
1101  for dst in b.bto:
1102  if isinstance(dst.label, asm_label):
1103  self.g.add_edge(b.label, dst.label)
1104 
1105  def add_blocs(self, ab):
1106  for b in ab:
1107  self.add(b)
1108 
1109  def get_bad_dst(self):
1110  o = set()
1111  for b in self.blocs.values():
1112  for c in b.bto:
1113  if c.c_t == asm_constraint.c_bad:
1114  o.add(b)
1115  return o
1116 
1117 
1118 def find_parents(blocs, l):
1119  p = set()
1120  for b in blocs:
1121  if l in [x.label for x in b.bto if isinstance(x.label, asm_label)]:
1122  p.add(b.label)
1123  return p
1124 
1125 
1126 def bloc_blink(blocs):
1127  for b in blocs:
1128  b.parents = find_parents(blocs, b.label)
1129 
1130 
1131 def getbloc_around(blocs, a, level=3, done=None, blocby_label=None):
1132 
1133  if not blocby_label:
1134  blocby_label = {}
1135  for b in blocs:
1136  blocby_label[b.label] = b
1137  if done is None:
1138  done = set()
1139 
1140  done.add(a)
1141  if not level:
1142  return done
1143  for b in a.parents:
1144  b = blocby_label[b]
1145  if b in done:
1146  continue
1147  done.update(getbloc_around(blocs, b, level - 1, done, blocby_label))
1148  for b in a.bto:
1149  b = blocby_label[b.label]
1150  if b in done:
1151  continue
1152  done.update(getbloc_around(blocs, b, level - 1, done, blocby_label))
1153  return done
1154 
1155 
1156 def getbloc_parents(blocs, a, level=3, done=None, blocby_label=None):
1157 
1158  if not blocby_label:
1159  blocby_label = {}
1160  for b in blocs:
1161  blocby_label[b.label] = b
1162  if done is None:
1163  done = set()
1164 
1165  done.add(a)
1166  if not level:
1167  return done
1168  for b in a.parents:
1169  b = blocby_label[b]
1170  if b in done:
1171  continue
1172  done.update(getbloc_parents(blocs, b, level - 1, done, blocby_label))
1173  return done
1174 
1175 # get ONLY level_X parents
1176 
1177 
1179  blocs, a, level=3, rez=None, done=None, blocby_label=None):
1180 
1181  if not blocby_label:
1182  blocby_label = {}
1183  for b in blocs:
1184  blocby_label[b.label] = b
1185  if rez is None:
1186  rez = set()
1187  if done is None:
1188  done = set()
1189 
1190  done.add(a)
1191  if level == 0:
1192  rez.add(a)
1193  if not level:
1194  return rez
1195  for b in a.parents:
1196  b = blocby_label[b]
1197  if b in done:
1198  continue
1199  rez.update(getbloc_parents_strict(
1200  blocs, b, level - 1, rez, done, blocby_label))
1201  return rez
1202 
1203 
1204 def bloc_find_path_next(blocs, blocby_label, a, b, path=None):
1205  if path == None:
1206  path = []
1207  if a == b:
1208  return [path]
1209 
1210  all_path = []
1211  for x in a.bto:
1212  if x.c_t != asm_constraint.c_next:
1213  continue
1214  if not x.label in blocby_label:
1215  log_asmbloc.error('XXX unknown label')
1216  continue
1217  x = blocby_label[x.label]
1218  all_path += bloc_find_path_next(blocs, blocby_label, x, b, path + [a])
1219  # stop if at least one path found
1220  if all_path:
1221  return all_path
1222  return all_path
1223 
1224 
1225 def bloc_merge(blocs, dont_merge=[]):
1226  blocby_label = {}
1227  for b in blocs:
1228  blocby_label[b.label] = b
1229  b.parents = find_parents(blocs, b.label)
1230 
1231  i = -1
1232  while i < len(blocs) - 1:
1233  i += 1
1234  b = blocs[i]
1235  if b.label in dont_merge:
1236  continue
1237  p = set(b.parents)
1238  # if bloc dont self ref
1239  if b.label in p:
1240  continue
1241  # and bloc has only one parent
1242  if len(p) != 1:
1243  continue
1244  # may merge
1245  bpl = p.pop()
1246  # bp = getblocby_label(blocs, bpl)
1247  bp = blocby_label[bpl]
1248  # and parent has only one son
1249  if len(bp.bto) != 1:
1250  continue
1251  # and will not create next loop composed of constraint_next from son to
1252  # parent
1253 
1254  path = bloc_find_path_next(blocs, blocby_label, b, bp)
1255  if path:
1256  continue
1257  if bp.lines:
1258  l = bp.lines[-1]
1259  # jmp opt; jcc opt
1260  if l.is_subcall():
1261  continue
1262  if l.breakflow() and l.dstflow():
1263  bp.lines.pop()
1264  # merge
1265  # sons = b.bto[:]
1266 
1267  # update parents
1268  for s in b.bto:
1269  if not isinstance(s.label, asm_label):
1270  continue
1271  if s.label.name == None:
1272  continue
1273  if not s.label in blocby_label:
1274  log_asmbloc.error("unknown parent XXX")
1275  continue
1276  bs = blocby_label[s.label]
1277  for p in list(bs.parents):
1278  if p == b.label:
1279  bs.parents.discard(p)
1280  bs.parents.add(bp.label)
1281  bp.lines += b.lines
1282  bp.bto = b.bto
1283 
1284  del blocs[i]
1285  i = -1
1286 
1287 
1289 
1290  def __init__(self, arch, attrib, bs=None, **kwargs):
1291  self.arch = arch
1292  self.attrib = attrib
1293  self.bs = bs
1295  self.dont_dis = []
1296  self.split_dis = []
1297  self.follow_call = False
1298  self.dontdis_retcall = False
1299  self.lines_wd = None
1300  self.blocs_wd = None
1301  self.dis_bloc_callback = None
1303  self.job_done = set()
1304  self.__dict__.update(kwargs)
1305 
1306  def dis_bloc(self, offset):
1307  l = self.symbol_pool.getby_offset_create(offset)
1308  current_bloc = asm_bloc(l)
1309  dis_bloc(self.arch, self.bs, current_bloc, offset, self.job_done,
1310  self.symbol_pool,
1311  dont_dis=self.dont_dis, split_dis=self.split_dis,
1312  follow_call=self.follow_call,
1313  dontdis_retcall=self.dontdis_retcall,
1314  lines_wd=self.lines_wd,
1315  dis_bloc_callback=self.dis_bloc_callback,
1316  dont_dis_nulstart_bloc=self.dont_dis_nulstart_bloc,
1317  attrib=self.attrib)
1318  return current_bloc
1319 
1320  def dis_multibloc(self, offset, blocs=None):
1321  blocs = dis_bloc_all(self.arch, self.bs, offset, self.job_done,
1322  self.symbol_pool,
1323  dont_dis=self.dont_dis, split_dis=self.split_dis,
1324  follow_call=self.follow_call,
1325  dontdis_retcall=self.dontdis_retcall,
1326  blocs_wd=self.blocs_wd,
1327  lines_wd=self.lines_wd,
1328  blocs=blocs,
1329  dis_bloc_callback=self.dis_bloc_callback,
1330  dont_dis_nulstart_bloc=self.dont_dis_nulstart_bloc,
1331  attrib=self.attrib)
1332  return blocs
def group_constrained_blocks
Definition: asmbloc.py:803
def getbloc_parents_strict
Definition: asmbloc.py:1179
def get_blockchains_address_interval
Definition: asmbloc.py:848
def expr_is_int_or_label
Definition: asmbloc.py:32