Miasm2
 All Classes Namespaces Files Functions Variables Typedefs Properties Macros
analysis.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 #-*- coding:utf-8 -*-
3 
4 import logging
5 
6 from miasm2.ir.symbexec import symbexec
7 from miasm2.core.graph import DiGraph
9  import ExprAff, ExprCond, ExprId, ExprInt, ExprMem
10 
11 log = logging.getLogger("analysis")
12 console_handler = logging.StreamHandler()
13 console_handler.setFormatter(logging.Formatter("%(levelname)-5s: %(message)s"))
14 log.addHandler(console_handler)
15 log.setLevel(logging.WARNING)
16 
17 class ira:
18 
19  def ira_regs_ids(self):
20  """Returns ids of all registers used in the IR"""
21  return self.arch.regs.all_regs_ids + [self.IRDst]
22 
23  def sort_dst(self, todo, done):
24  out = set()
25  while todo:
26  dst = todo.pop()
27  if self.ExprIsLabel(dst):
28  done.add(dst)
29  elif isinstance(dst, ExprMem) or isinstance(dst, ExprInt):
30  done.add(dst)
31  elif isinstance(dst, ExprCond):
32  todo.add(dst.src1)
33  todo.add(dst.src2)
34  elif isinstance(dst, ExprId):
35  out.add(dst)
36  else:
37  done.add(dst)
38  return out
39 
40  def dst_trackback(self, b):
41  dst = b.dst
42  todo = set([dst])
43  done = set()
44 
45  for irs in reversed(b.irs):
46  if len(todo) == 0:
47  break
48  out = self.sort_dst(todo, done)
49  found = set()
50  follow = set()
51  for i in irs:
52  if not out:
53  break
54  for o in out:
55  if i.dst == o:
56  follow.add(i.src)
57  found.add(o)
58  for o in found:
59  out.remove(o)
60 
61  for o in out:
62  if o not in found:
63  follow.add(o)
64  todo = follow
65 
66  return done
67 
68  def gen_graph(self, link_all = True):
69  """
70  Gen irbloc digraph
71  @link_all: also gen edges to non present irblocs
72  """
73  self.g = DiGraph()
74  for lbl, b in self.blocs.items():
75  # print 'add', lbl
76  self.g.add_node(lbl)
77  # dst = self.get_bloc_dst(b)
78  dst = self.dst_trackback(b)
79  # print "\tdst", dst
80  for d in dst:
81  if isinstance(d, ExprInt):
82  d = ExprId(
83  self.symbol_pool.getby_offset_create(int(d.arg)))
84  if self.ExprIsLabel(d):
85  if d.name in self.blocs or link_all is True:
86  self.g.add_edge(lbl, d.name)
87 
88  def graph(self):
89  """Output the graphviz script"""
90  out = """
91  digraph asm_graph {
92  size="80,50";
93  node [
94  fontsize = "16",
95  shape = "box"
96  ];
97  """
98  all_lbls = {}
99  for lbl in self.g.nodes():
100  if lbl not in self.blocs:
101  continue
102  irb = self.blocs[lbl]
103  ir_txt = [str(lbl)]
104  for irs in irb.irs:
105  for l in irs:
106  ir_txt.append(str(l))
107  ir_txt.append("")
108  ir_txt.append("")
109  all_lbls[hash(lbl)] = "\l\\\n".join(ir_txt)
110  for l, v in all_lbls.items():
111  # print l, v
112  out += '%s [label="%s"];\n' % (l, v)
113 
114  for a, b in self.g.edges():
115  # print 'edge', a, b, hash(a), hash(b)
116  out += '%s -> %s;\n' % (hash(a), hash(b))
117  out += '}'
118  return out
119 
120  def remove_dead_instr(self, irb, useful):
121  """Remove dead affectations using previous reaches analysis
122  @irb: irbloc instance
123  @useful: useful statements from previous reach analysis
124  Return True iff the block state has changed
125  PRE: compute_reach(self)
126  """
127  modified = False
128  for k, ir in enumerate(irb.irs):
129  j = 0
130  while j < len(ir):
131  cur_instr = ir[j]
132  if (isinstance(cur_instr.dst, ExprId)
133  and (irb.label, k, cur_instr) not in useful):
134  del ir[j]
135  modified = True
136  else:
137  j += 1
138  return modified
139 
140  def init_useful_instr(self):
141  """Computes a set of triples (block, instruction number, instruction)
142  containing initially useful instructions :
143  - Instructions affecting final value of return registers
144  - Instructions affecting IRDst register
145  - Instructions writing in memory
146  - Function call instructions
147  Return set of intial useful instructions
148  """
149 
150  useful = set()
151 
152  for node in self.g.nodes():
153  if node not in self.blocs:
154  continue
155 
156  block = self.blocs[node]
157  successors = self.g.successors(node)
158  has_son = bool(successors)
159  for p_son in successors:
160  if p_son not in self.blocs:
161  # Leaf has lost its son: don't remove anything
162  # reaching this block
163  for r in self.ira_regs_ids():
164  useful.update(block.cur_reach[-1][r].union(
165  block.defout[-1][r]))
166 
167  # Function call, memory write or IRDst affectation
168  for k, ir in enumerate(block.irs):
169  for i_cur in ir:
170  if i_cur.src.is_function_call():
171  # /!\ never remove ir calls
172  useful.add((block.label, k, i_cur))
173  if isinstance(i_cur.dst, ExprMem):
174  useful.add((block.label, k, i_cur))
175  useful.update(block.defout[k][self.IRDst])
176 
177  # Affecting return registers
178  if not has_son:
179  for r in self.get_out_regs(block):
180  useful.update(block.defout[-1][r]
181  if block.defout[-1][r] else
182  block.cur_reach[-1][r])
183 
184  return useful
185 
186  def _mark_useful_code(self):
187  """Mark useful statements using previous reach analysis
188 
189  Source : Kennedy, K. (1979). A survey of data flow analysis techniques.
190  IBM Thomas J. Watson Research Division, Algorithm MK
191 
192  Return a set of triplets (block, instruction number, instruction) of
193  useful instructions
194  PRE: compute_reach(self)
195 
196  """
197 
198  useful = self.init_useful_instr()
199  worklist = useful.copy()
200  while worklist:
201  elem = worklist.pop()
202  useful.add(elem)
203  irb, irs_ind, ins = elem
204 
205  block = self.blocs[irb]
206  instr_defout = block.defout[irs_ind]
207  cur_kill = block.cur_kill[irs_ind]
208  cur_reach = block.cur_reach[irs_ind]
209 
210  # Handle dependencies of used variables in ins
211  for reg in ins.get_r(True).intersection(self.ira_regs_ids()):
212  worklist.update(
213  cur_reach[reg].difference(useful).difference(
214  cur_kill[reg]
215  if not instr_defout[reg] else
216  set()))
217  for _, _, i in instr_defout[reg]:
218  # Loop case (i in defout of current block)
219  if i == ins:
220  worklist.update(cur_reach[reg].difference(useful))
221  return useful
222 
223  def remove_dead_code(self):
224  """Remove dead instructions in each block of the graph using the reach
225  analysis .
226  Returns True if a block has been modified
227  PRE : compute_reach(self)
228  """
229  useful = self._mark_useful_code()
230  modified = False
231  for block in self.blocs.values():
232  modified |= self.remove_dead_instr(block, useful)
233  return modified
234 
235  def set_dead_regs(self, b):
236  pass
237 
238  def add_unused_regs(self):
239  pass
240 
241  @staticmethod
242  def print_set(v_set):
243  """Print each triplet contained in a set
244  @v_set: set containing triplets elements
245  """
246  for p in v_set:
247  print ' (%s, %s, %s)' % p
248 
249  def dump_bloc_state(self, irb):
250  print '*'*80
251  for k, irs in enumerate(irb.irs):
252  for i in xrange(len(irs)):
253  print 5*"-"
254  print 'instr', k, irs[i]
255  print 5*"-"
256  for v in self.ira_regs_ids():
257  if irb.cur_reach[k][v]:
258  print 'REACH[%d][%s]' % (k, v)
259  self.print_set(irb.cur_reach[k][v])
260  if irb.cur_kill[k][v]:
261  print 'KILL[%d][%s]' % (k, v)
262  self.print_set(irb.cur_kill[k][v])
263  if irb.defout[k][v]:
264  print 'DEFOUT[%d][%s]' % (k, v)
265  self.print_set(irb.defout[k][v])
266 
267  def compute_reach_block(self, irb):
268  """Variable influence computation for a single block
269  @irb: irbloc instance
270  PRE: init_reach()
271  """
272 
273  reach_block = {key: value.copy()
274  for key, value in irb.cur_reach[0].iteritems()}
275 
276  # Compute reach from predecessors
277  for n_pred in self.g.predecessors(irb.label):
278  p_block = self.blocs[n_pred]
279 
280  # Handle each register definition
281  for c_reg in self.ira_regs_ids():
282  # REACH(n) = U[p in pred] DEFOUT(p) U REACH(p)\KILL(p)
283  pred_through = p_block.defout[-1][c_reg].union(
284  p_block.cur_reach[-1][c_reg].difference(
285  p_block.cur_kill[-1][c_reg]))
286  reach_block[c_reg].update(pred_through)
287 
288  # If a predecessor has changed
289  if reach_block != irb.cur_reach[0]:
290  irb.cur_reach[0] = reach_block
291  for c_reg in self.ira_regs_ids():
292  if irb.defout[0][c_reg]:
293  # KILL(n) = DEFOUT(n) ? REACH(n)\DEFOUT(n) : EMPTY
294  irb.cur_kill[0][c_reg].update(
295  reach_block[c_reg].difference(irb.defout[0][c_reg]))
296 
297  # Compute reach and kill for block's instructions
298  for i in xrange(1, len(irb.irs)):
299  for c_reg in self.ira_regs_ids():
300  # REACH(n) = U[p in pred] DEFOUT(p) U REACH(p)\KILL(p)
301  pred_through = irb.defout[i - 1][c_reg].union(
302  irb.cur_reach[i - 1][c_reg].difference(
303  irb.cur_kill[i - 1][c_reg]))
304  irb.cur_reach[i][c_reg].update(pred_through)
305  if irb.defout[i][c_reg]:
306  # KILL(n) = DEFOUT(n) ? REACH(n)\DEFOUT(n) : EMPTY
307  irb.cur_kill[i][c_reg].update(
308  irb.cur_reach[i][c_reg].difference(
309  irb.defout[i][c_reg]))
310 
312  """Return True iff a fixed point has been reached during reach
313  analysis"""
314 
315  fixed = True
316  for node in self.g.nodes():
317  if node in self.blocs:
318  irb = self.blocs[node]
319  if (irb.cur_reach != irb.prev_reach or
320  irb.cur_kill != irb.prev_kill):
321  fixed = False
322  irb.prev_reach = irb.cur_reach[:]
323  irb.prev_kill = irb.cur_kill[:]
324  return fixed
325 
326  def compute_reach(self):
327  """
328  Compute reach, defout and kill sets until a fixed point is reached.
329 
330  Source : Kennedy, K. (1979). A survey of data flow analysis techniques.
331  IBM Thomas J. Watson Research Division, page 43
332 
333  PRE: gen_graph()
334  """
335  fixed_point = False
336  log.debug('iteration...')
337  while not fixed_point:
338  for node in self.g.nodes():
339  if node in self.blocs:
340  self.compute_reach_block(self.blocs[node])
341  fixed_point = self._test_kill_reach_fix()
342 
343  def dead_simp(self):
344  """
345  This function is used to analyse relation of a * complete function *
346  This means the blocks under study represent a solid full function graph.
347 
348  Source : Kennedy, K. (1979). A survey of data flow analysis techniques.
349  IBM Thomas J. Watson Research Division, page 43
350 
351  PRE: gen_graph()
352  """
353  # Update r/w variables for all irblocs
354  self.get_rw(self.ira_regs_ids())
355  # Liveness step
356  self.compute_reach()
357  self.remove_dead_code()
358  # Simplify expressions
359  self.simplify_blocs()
360 
361  def gen_equations(self):
362  for irb in self.blocs.values():
363  symbols_init = {}
364  for r in self.arch.regs.all_regs_ids:
365  x = ExprId(r.name, r.size)
366  x.is_term = True
367  symbols_init[r] = x
368  sb = symbexec(self, dict(symbols_init))
369  sb.emulbloc(irb)
370  eqs = []
371  for n_w in sb.symbols:
372  v = sb.symbols[n_w]
373  if n_w in symbols_init and symbols_init[n_w] == v:
374  continue
375  eqs.append(ExprAff(n_w, v))
376  print '*' * 40
377  print irb
378  irb.irs = [eqs]
379  irb.lines = [None]
380 
381  def sizeof_char(self):
382  "Return the size of a char in bits"
383  raise NotImplementedError("Abstract method")
384 
385  def sizeof_short(self):
386  "Return the size of a short in bits"
387  raise NotImplementedError("Abstract method")
388 
389  def sizeof_int(self):
390  "Return the size of an int in bits"
391  raise NotImplementedError("Abstract method")
392 
393  def sizeof_long(self):
394  "Return the size of a long in bits"
395  raise NotImplementedError("Abstract method")
396 
397  def sizeof_pointer(self):
398  "Return the size of a void* in bits"
399  raise NotImplementedError("Abstract method")