16 log_asmbloc = logging.getLogger(
"asmblock")
17 console_handler = logging.StreamHandler()
18 console_handler.setFormatter(logging.Formatter(
"%(levelname)-5s: %(message)s"))
19 log_asmbloc.addHandler(console_handler)
20 log_asmbloc.setLevel(logging.WARNING)
24 return isinstance(a, int)
or isinstance(a, long)
or \
25 isinstance(a, moduint)
or isinstance(a, modint)
29 return isinstance(e, m2_expr.ExprId)
and isinstance(e.name, asm_label)
33 return isinstance(e, m2_expr.ExprInt)
or \
34 (isinstance(e, m2_expr.ExprId)
and isinstance(e.name, asm_label))
39 "Stand for an assembly label"
44 name =
"loc_%.16X" % (int(name) & 0xFFFFFFFFFFFFFFFF)
53 if isinstance(self.
offset, (int, long)):
61 rep += repr(self.
name) +
' '
85 return "%s:%s" % (str(self.
c_t), str(self.
label))
91 super(asm_constraint_next, self).
__init__(
92 label, c_t=asm_constraint.c_next)
98 super(asm_constraint_to, self).
__init__(
99 label, c_t=asm_constraint.c_to)
105 super(asm_constraint_bad, self).
__init__(
106 label, c_t=asm_constraint.c_bad)
119 out.append(str(self.
label))
126 lbls.append(
"Unknown? ")
128 lbls.append(str(l) +
" ")
129 lbls =
'\t'.join(lbls)
131 return '\n'.join(out)
141 log_asmbloc.debug(
'split at %x', offset)
143 offsets = [x.offset
for x
in self.
lines]
144 if not l.offset
in offsets:
146 'cannot split bloc at %X ' % offset +
147 'middle instruction? default middle')
151 i = offsets.index(offset)
155 log_asmbloc.debug(
'flow mod %r', flow_mod_instr)
161 log_asmbloc.debug(
'lbl %s', xx)
163 [x
for x
in self.
bto if x.c_t == asm_constraint.c_next])
164 c_to = [x
for x
in self.
bto if x.c_t != asm_constraint.c_next]
165 self.
bto = set([c] + c_to)
166 new_bloc.bto = c_next
168 new_bloc.bto = self.
bto
174 return self.
lines[0].offset, self.
lines[-1].offset
179 return [x.offset
for x
in self.
lines]
182 if type(offset)
in [int, long]:
183 l = symbol_pool.getby_offset_create(offset)
184 elif type(offset)
is str:
185 l = symbol_pool.getby_name_create(offset)
186 elif isinstance(offset, asm_label):
189 raise ValueError(
'unknown offset type %r' % offset)
196 for i
in xrange(-1, -1 - self.
lines[0].delayslot - 1, -1):
197 if not 0 <= i < len(self.
lines):
200 if l.splitflow()
or l.breakflow():
201 raise NotImplementedError(
'not fully functional')
206 delayslot = self.
lines[0].delayslot
207 end_index = len(self.
lines) - 1
208 ds_max_index = max(end_index - delayslot, 0)
209 for i
in xrange(end_index, ds_max_index - 1, -1):
217 if x.c_t == asm_constraint.c_next:
232 Create and add a label to the symbol_pool
234 @offset: (optional) label's offset
241 raise ValueError(
'symbol %s has same offset as %s' %
245 raise ValueError(
'symbol %s has same name as %s' %
248 self._labels.append(label)
249 if label.offset
is not None:
259 self._name2label.pop(label.name,
None)
260 self._offset2label.pop(label.offset,
None)
262 self._labels.remove(label)
265 """Unpin the @label from its offset"""
266 self._offset2label.pop(label.offset,
None)
270 """Retrieve label using its @offset"""
271 return self._offset2label.get(offset,
None)
274 """Retrieve label using its @name"""
275 return self._name2label.get(name,
None)
278 """Get a label from its @name, create it if it doesn't exist"""
285 """Get a label from its @offset, create it if it doesn't exist"""
292 """Rename the @label name to @newname"""
294 raise ValueError(
'Symbol already known')
295 self._name2label.pop(label.name,
None)
300 """Pin the @label from at @offset
301 Note that there is a special case when the offset is a list
302 it happens when offsets are recomputed in resolve_symbol*
305 raise ValueError(
'label should not be None')
307 raise ValueError(
'label %s not in symbol pool' % label)
309 raise ValueError(
'Conflict in label %s' % label)
310 self._offset2label.pop(label.offset,
None)
311 label.offset = offset
317 """Return all labels"""
321 return reduce(
lambda x, y: x + str(y) +
'\n', self.
_labels,
"")
328 raise KeyError(
'unknown symbol %r' % item)
334 """Merge with another @symbol_pool"""
335 self.
_labels += symbol_pool._labels
336 self._name2label.update(symbol_pool._name2label)
337 self._offset2label.update(symbol_pool._offset2label)
340 """Generate a new unpinned label"""
346 def dis_bloc(mnemo, pool_bin, cur_bloc, offset, job_done, symbol_pool,
347 dont_dis=[], split_dis=[
348 ], follow_call=
False, dontdis_retcall=
False, lines_wd=
None,
349 dis_bloc_callback=
None, dont_dis_nulstart_bloc=
False,
354 delayslot_count = mnemo.delayslot
355 offsets_to_dis = set()
356 add_next_offset =
False
357 log_asmbloc.debug(
"dis at %X", int(offset))
358 while not in_delayslot
or delayslot_count > 0:
362 if offset
in dont_dis
or (lines_cpt > 0
and offset
in split_dis):
363 cur_bloc.add_cst(offset, asm_constraint.c_next, symbol_pool)
364 offsets_to_dis.add(offset)
368 if lines_wd
is not None and lines_cpt > lines_wd:
372 if offset
in job_done:
373 cur_bloc.add_cst(offset, asm_constraint.c_next, symbol_pool)
379 instr = mnemo.dis(pool_bin, attrib, offset)
380 except (Disasm_Exception, IOError), e:
381 log_asmbloc.warning(e)
385 log_asmbloc.warning(
"cannot disasm at %X", int(off_i))
386 cur_bloc.add_cst(off_i, asm_constraint.c_bad, symbol_pool)
390 if dont_dis_nulstart_bloc
and instr.b.count(
'\x00') == instr.l:
391 log_asmbloc.warning(
"reach nul instr at %X", int(off_i))
392 cur_bloc.add_cst(off_i, asm_constraint.c_bad, symbol_pool)
396 if in_delayslot
and instr
and (instr.splitflow()
or instr.breakflow()):
397 add_next_offset =
True
401 log_asmbloc.debug(
"dis at %X", int(offset))
404 log_asmbloc.debug(instr)
405 log_asmbloc.debug(instr.args)
407 cur_bloc.addline(instr)
408 if not instr.breakflow():
411 if instr.splitflow()
and not (instr.is_subcall()
and dontdis_retcall):
412 add_next_offset =
True
416 instr.dstflow2label(symbol_pool)
417 dst = instr.getdstflow(symbol_pool)
420 if isinstance(d, m2_expr.ExprId)
and \
421 isinstance(d.name, asm_label):
424 if (
not instr.is_subcall())
or follow_call:
430 delayslot_count = instr.delayslot
432 for c
in cur_bloc.bto:
433 if c.c_t == asm_constraint.c_bad:
435 if isinstance(c.label, asm_label):
436 offsets_to_dis.add(c.label.offset)
439 cur_bloc.add_cst(offset, asm_constraint.c_next, symbol_pool)
440 offsets_to_dis.add(offset)
442 if dis_bloc_callback
is not None:
444 mnemo, attrib, pool_bin, cur_bloc, offsets_to_dis, symbol_pool)
446 return offsets_to_dis
449 def split_bloc(mnemo, attrib, pool_bin, blocs,
450 symbol_pool, more_ref=
None, dis_bloc_callback=
None):
455 bloc_dst = [symbol_pool._offset2label[x]
for x
in more_ref]
458 if not isinstance(c.label, asm_label):
460 if c.c_t == asm_constraint.c_bad:
462 bloc_dst.append(c.label)
464 bloc_dst = [x.offset
for x
in bloc_dst
if x.offset
is not None]
467 while j < len(blocs) - 1:
470 a, b = cb.get_range()
473 if not (off > a
and off <= b):
475 l = symbol_pool.getby_offset_create(off)
476 new_b = cb.split(off, l)
477 log_asmbloc.debug(
"split bloc %x", off)
479 log_asmbloc.error(
"cannot split %x!!", off)
481 if dis_bloc_callback:
482 offsets_to_dis = set(
483 [x.label.offset
for x
in new_b.bto
484 if isinstance(x.label, asm_label)])
486 mnemo, attrib, pool_bin, new_b, offsets_to_dis,
489 a, b = cb.get_range()
494 def dis_bloc_all(mnemo, pool_bin, offset, job_done, symbol_pool, dont_dis=[],
495 split_dis=[], follow_call=
False, dontdis_retcall=
False,
496 blocs_wd=
None, lines_wd=
None, blocs=
None,
497 dis_bloc_callback=
None, dont_dis_nulstart_bloc=
False,
499 log_asmbloc.info(
"dis bloc all")
507 if blocs_wd
is not None and bloc_cpt > blocs_wd:
508 log_asmbloc.debug(
"blocs watchdog reached at %X", int(offset))
521 if not isinstance(dd, tuple):
529 l = symbol_pool.getby_offset_create(n)
531 todo +=
dis_bloc(mnemo, pool_bin, cur_bloc, n, job_done, symbol_pool,
532 dont_dis, split_dis, follow_call, dontdis_retcall,
533 dis_bloc_callback=dis_bloc_callback,
535 dont_dis_nulstart_bloc=dont_dis_nulstart_bloc,
537 blocs.append(cur_bloc)
539 return split_bloc(mnemo, attrib, pool_bin, blocs,
540 symbol_pool, dis_bloc_callback=dis_bloc_callback)
544 """Render dot graph of @blocks"""
546 escape_chars = re.compile(
'[' + re.escape(
'{}') +
']')
547 label_attr =
'colspan="2" align="center" bgcolor="grey"'
548 edge_attr =
'label = "%s" color="%s" style="bold"'
549 td_attr =
'align="left"'
550 block_attr =
'shape="Mrecord" fontname="Courier New"'
552 out = [
"digraph asm_graph {"]
553 fix_chars =
lambda x:
'\\' + x.group()
558 out_block =
'%s [\n' % block.label.name
559 out_block +=
"%s " % block_attr
560 out_block +=
'label =<<table border="0" cellborder="0" cellpadding="3">'
562 block_label =
'<tr><td %s>%s</td></tr>' % (
563 label_attr, block.label.name)
564 block_html_lines = []
566 for line
in block.lines:
568 out_render =
"%.8X</td><td %s> " % (line.offset, td_attr)
571 out_render += escape_chars.sub(fix_chars, str(line))
572 block_html_lines.append(out_render)
573 block_html_lines = (
'<tr><td %s>' % td_attr +
574 (
'</td></tr><tr><td %s>' % td_attr).join(block_html_lines) +
576 out_block +=
"%s " % block_label
577 out_block += block_html_lines +
"</table>> ];"
578 out_blocks.append(out_block)
584 for next_b
in block.bto:
585 if (isinstance(next_b.label, m2_expr.ExprId)
or
586 isinstance(next_b.label, asm_label)):
587 src, dst, cst = block.label.name, next_b.label.name, next_b.c_t
590 if isinstance(src, asm_label):
592 if isinstance(dst, asm_label):
596 if next_b.c_t == asm_constraint.c_next:
598 elif next_b.c_t == asm_constraint.c_to:
599 edge_color =
"limegreen"
601 if len(block.bto) == 1:
604 out.append(
'%s -> %s' % (src, dst) +
605 '[' + edge_attr % (cst, edge_color) +
'];')
608 return '\n'.join(out)
614 Try to keep original instruction bytes if it exists
616 candidates = mnemo.asm(instr, symbols)
618 raise ValueError(
'cannot asm:%s' % str(instr))
619 if not hasattr(instr,
"b"):
620 return candidates[0], candidates
621 if instr.b
in candidates:
622 return instr.b, candidates
625 if len(c) == len(instr.b):
627 return candidates[0], candidates
631 """Resolve an expression @expr using @symbols"""
633 if isinstance(e, m2_expr.ExprId):
634 s = symbols._name2label[e.name]
635 e = m2_expr.ExprInt_from(e, s.offset)
637 result = expr.visit(expr_calc)
639 if not isinstance(result, m2_expr.ExprInt):
640 raise RuntimeError(
'Cannot resolve symbol %s' % expr)
645 """Asm and compute max block size"""
649 for instr
in block.lines:
650 if isinstance(instr, asm_raw):
652 if isinstance(instr.raw, list):
654 if len(instr.raw) == 0:
657 l = instr.raw[0].size / 8 * len(instr.raw)
658 elif isinstance(instr.raw, str):
662 raise NotImplementedError(
'asm raw')
668 candidates = mnemo.asm(instr)
669 l = len(candidates[-1])
671 l = mnemo.max_instruction_len
678 block.max_size = size
679 log_asmbloc.info(
"size: %d max: %d", block.size, block.max_size)
683 """Fix the @label offset to @offset. If the @offset has changed, add @label
685 @symbol_pool: current symbol_pool
687 if label.offset == offset:
689 symbol_pool.set_offset(label, offset)
695 """Manage blocks linked with an asm_constraint_next"""
704 """Return True iff at least one block is pinned"""
709 for i, block
in enumerate(self.
blocks):
710 if is_int(block.label.offset):
712 raise ValueError(
"Multiples pinned block detected")
716 """Compute BlockChain min_offset and max_offset using pinned block and
722 self.
max_size += block.max_size + block.alignment - 1
734 (block.alignment - block.max_size) % block.alignment
739 (block.alignment - block.max_size) % block.alignment
742 """Best effort merge two block chains
743 Return the list of resulting blockchains"""
744 self.
blocks += chain.blocks
749 """Propagate a pinned to its blocks' neighbour
750 @modified_labels: store new pinned labels"""
753 raise ValueError(
'Trying to fix unpinned block')
757 offset = pinned_block.label.offset
758 if offset % pinned_block.alignment != 0:
759 raise RuntimeError(
'Bad alignment')
762 new_offset = offset - block.size
763 new_offset = new_offset - new_offset % pinned_block.alignment
770 offset = pinned_block.label.offset + pinned_block.size
772 last_block = pinned_block
774 offset += (- offset) % last_block.alignment
781 return modified_labels
786 """Stand for wedges between blocks"""
796 """Best effort merge two block chains
797 Return the list of resulting blockchains"""
798 self.symbol_pool.set_offset(chain.blocks[0].label, self.
offset_max)
805 Return the BlockChains list built from grouped asm blocks linked by
807 @blocks: a list of asm block
809 log_asmbloc.info(
'group_constrained_blocks')
812 remaining_blocks = list(blocks)
813 known_block_chains = {}
814 lbl2block = {block.label: block
for block
in blocks}
816 while remaining_blocks:
818 block_list = [remaining_blocks.pop()]
823 next_label = block_list[-1].get_next()
824 if next_label
is None or next_label
not in lbl2block:
826 next_block = lbl2block[next_label]
829 if next_block
not in remaining_blocks:
831 block_list.append(next_block)
832 remaining_blocks.remove(next_block)
835 if next_label
is not None and next_label
in known_block_chains:
836 block_list += known_block_chains[next_label]
837 del known_block_chains[next_label]
839 known_block_chains[block_list[0].label] = block_list
841 out_block_chains = []
842 for label
in known_block_chains:
843 chain =
BlockChain(symbol_pool, known_block_chains[label])
844 out_block_chains.append(chain)
845 return out_block_chains
849 """Compute the interval used by the pinned @blockChains
850 Check if the placed chains are in the @dst_interval"""
853 for chain
in blockChains:
856 chain_interval =
interval([(chain.offset_min, chain.offset_max - 1)])
857 if chain_interval
not in dst_interval:
858 raise ValueError(
'Chain placed out of destination interval')
859 allocated_interval += chain_interval
860 return allocated_interval
864 """Place @blockChains in the @dst_interval"""
866 log_asmbloc.info(
'resolve_symbol')
867 if dst_interval
is None:
868 dst_interval =
interval([(0, 0xFFFFFFFFFFFFFFFF)])
871 [(-1, 0xFFFFFFFFFFFFFFFF + 1)]) - dst_interval
874 log_asmbloc.debug(
'allocated interval: %s', allocated_interval)
876 pinned_chains = [chain
for chain
in blockChains
if chain.pinned]
879 for start, stop
in forbidden_interval.intervals:
881 symbol_pool, offset=start, size=stop + 1 - start)
882 pinned_chains.append(wedge)
885 pinned_chains.sort(key=
lambda x: x.offset_min)
886 blockChains.sort(key=
lambda x: -x.max_size)
888 fixed_chains = list(pinned_chains)
890 log_asmbloc.debug(
"place chains")
891 for chain
in blockChains:
895 for i
in xrange(1, len(fixed_chains)):
896 prev_chain = fixed_chains[i - 1]
897 next_chain = fixed_chains[i]
899 if prev_chain.offset_max + chain.max_size < next_chain.offset_min:
900 new_chains = prev_chain.merge(chain)
901 fixed_chains[i - 1:i] = new_chains
905 raise RuntimeError(
'Cannot find enough space to place blocks')
907 return [chain
for chain
in fixed_chains
if isinstance(chain, BlockChain)]
911 """Extract labels from list of ExprId @exprs"""
912 return set(expr.name
for expr
in exprs
if isinstance(expr.name, asm_label))
916 """Extract labels used by @block"""
918 for instr
in block.lines:
919 if isinstance(instr, asm_raw):
920 if isinstance(instr.raw, list):
921 for expr
in instr.raw:
922 symbols.update(m2_expr.get_expr_ids(expr))
924 for arg
in instr.args:
925 symbols.update(m2_expr.get_expr_ids(arg))
931 """Assemble a @block using @symbol_pool
932 @conservative: (optional) use original bytes when possible
936 for instr
in block.lines:
937 if isinstance(instr, asm_raw):
938 if isinstance(instr.raw, list):
941 for expr
in instr.raw:
943 data += pck[expr_int.size](expr_int.arg)
946 instr.offset = offset_i
951 saved_args = list(instr.args)
952 instr.offset = block.label.offset + offset_i
955 instr.args = instr.resolve_args_with_symbols(symbol_pool)
962 mnemo, instr, symbol_pool, conservative)
965 instr.args = saved_args
968 block.size = block.size - old_l + len(cached_candidate)
969 instr.data = cached_candidate
970 instr.l = len(cached_candidate)
975 def asmbloc_final(mnemo, blocks, blockChains, symbol_pool, conservative=False):
976 """Resolve and assemble @blockChains using @symbol_pool until fixed point is
979 log_asmbloc.debug(
"asmbloc_final")
982 lbl2block = {block.label: block
for block
in blocks}
983 blocks_using_label = {}
987 blocks_using_label.setdefault(label, set()).
add(block)
990 for chain
in blockChains:
991 for block
in chain.blocks:
992 block2chain[block] = chain
995 blocks_to_rework = set(blocks)
1001 modified_labels = set()
1002 for chain
in blockChains:
1003 chain.fix_blocks(modified_labels)
1005 for label
in modified_labels:
1007 if label
in lbl2block:
1008 blocks_to_rework.add(lbl2block[label])
1011 if label
not in blocks_using_label:
1013 for block
in blocks_using_label[label]:
1014 blocks_to_rework.add(block)
1017 if not blocks_to_rework:
1020 while blocks_to_rework:
1021 block = blocks_to_rework.pop()
1026 """Do sanity checks on blocks' constraints:
1027 * no multiple next constraint to same block
1028 * no next constraint to self"""
1031 graph = blocks_graph.g
1032 for label
in graph.nodes():
1033 if blocks_graph.blocs[label].get_next() == label:
1034 raise RuntimeError(
'Bad constraint: self in next')
1036 for pred
in graph.predecessors(label):
1037 if not pred
in blocks_graph.blocs:
1039 if blocks_graph.blocs[pred].get_next() == label:
1041 if len(pred_next) > 1:
1042 raise RuntimeError(
"Too many next constraints for bloc %r" % label)
1046 """Resolve and assemble @blocks using @symbol_pool into interval
1054 blockChains, symbol_pool, dst_interval)
1056 asmbloc_final(mnemo, blocks, resolved_blockChains, symbol_pool)
1060 for block
in blocks:
1061 offset = block.label.offset
1062 for instr
in block.lines:
1066 assert len(instr.data) == instr.l
1067 patches[offset] = instr.data
1068 instruction_interval =
interval([(offset, offset + instr.l - 1)])
1069 if not (instruction_interval & output_interval).empty:
1070 raise RuntimeError(
"overlapping bytes %X" % int(offset))
1071 instr.offset = offset
1079 return: graph of asmbloc
1084 g.lbl2bloc[b.label] = b
1087 g.add_edge(b.label, x.label)
1099 self.
blocs[b.label] = b
1100 self.g.add_node(b.label)
1102 if isinstance(dst.label, asm_label):
1103 self.g.add_edge(b.label, dst.label)
1111 for b
in self.blocs.values():
1113 if c.c_t == asm_constraint.c_bad:
1121 if l
in [x.label
for x
in b.bto
if isinstance(x.label, asm_label)]:
1133 if not blocby_label:
1136 blocby_label[b.label] = b
1147 done.update(
getbloc_around(blocs, b, level - 1, done, blocby_label))
1149 b = blocby_label[b.label]
1152 done.update(
getbloc_around(blocs, b, level - 1, done, blocby_label))
1158 if not blocby_label:
1161 blocby_label[b.label] = b
1179 blocs, a, level=3, rez=
None, done=
None, blocby_label=
None):
1181 if not blocby_label:
1184 blocby_label[b.label] = b
1200 blocs, b, level - 1, rez, done, blocby_label))
1212 if x.c_t != asm_constraint.c_next:
1214 if not x.label
in blocby_label:
1215 log_asmbloc.error(
'XXX unknown label')
1217 x = blocby_label[x.label]
1228 blocby_label[b.label] = b
1232 while i < len(blocs) - 1:
1235 if b.label
in dont_merge:
1247 bp = blocby_label[bpl]
1249 if len(bp.bto) != 1:
1262 if l.breakflow()
and l.dstflow():
1269 if not isinstance(s.label, asm_label):
1271 if s.label.name ==
None:
1273 if not s.label
in blocby_label:
1274 log_asmbloc.error(
"unknown parent XXX")
1276 bs = blocby_label[s.label]
1277 for p
in list(bs.parents):
1279 bs.parents.discard(p)
1280 bs.parents.add(bp.label)
1304 self.__dict__.update(kwargs)
1307 l = self.symbol_pool.getby_offset_create(offset)
def _set_pinned_block_idx
def group_constrained_blocks
def getbloc_parents_strict
def get_blockchains_address_interval