Miasm2
 All Classes Namespaces Files Functions Variables Typedefs Properties Macros
ir.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 #-*- coding:utf-8 -*-
3 
4 #
5 # Copyright (C) 2013 Fabrice Desclaux
6 #
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
16 #
17 # You should have received a copy of the GNU General Public License along
18 # with this program; if not, write to the Free Software Foundation, Inc.,
19 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 #
21 
22 
23 import miasm2.expression.expression as m2_expr
24 from miasm2.expression.expression_helper import get_missing_interval
25 from miasm2.core import asmbloc
26 from miasm2.expression.simplifications import expr_simp
27 from miasm2.core.asmbloc import asm_symbol_pool
28 
29 
30 class irbloc(object):
31 
32  def __init__(self, label, irs, lines = []):
33  assert(isinstance(label, asmbloc.asm_label))
34  self.label = label
35  self.irs = irs
36  self.lines = lines
37  self.except_automod = True
38  self._dst = None
39  self._dst_linenb = None
40 
41 
42  def _get_dst(self):
43  """Find the IRDst affectation and update dst, dst_linenb accordingly"""
44  if self._dst is not None:
45  return self._dst
46  dst = None
47  for linenb, ir in enumerate(self.irs):
48  for i in ir:
49  if isinstance(i.dst, m2_expr.ExprId) and i.dst.name == "IRDst":
50  if dst is not None:
51  raise ValueError('Multiple destinations!')
52  dst = i.src
53  dst_linenb = linenb
54  self._dst = dst
55  self._dst_linenb = linenb
56  return dst
57 
58  def _set_dst(self, value):
59  """Find and replace the IRDst affectation's source by @value"""
60  if self._dst_linenb is None:
61  self._get_dst()
62 
63  ir = self.irs[self._dst_linenb]
64  for i, expr in enumerate(ir):
65  if isinstance(expr.dst, m2_expr.ExprId) and expr.dst.name == "IRDst":
66  ir[i] = m2_expr.ExprAff(expr.dst, value)
67  self._dst = value
68 
69  dst = property(_get_dst, _set_dst)
70 
71  @property
72  def dst_linenb(self):
73  """Line number of the IRDst setting statement in the current irs"""
74  return self._dst_linenb
75 
76  def get_rw(self, regs_ids):
77  """
78  Computes the variables read and written by each instructions
79  Initialize attributes needed for in/out and reach computation.
80  @regs_ids : ids of registers used in IR
81  """
82  self.r = []
83  self.w = []
84  self.cur_reach = [{reg: set() for reg in regs_ids}
85  for _ in xrange(len(self.irs))]
86  self.prev_reach = [{reg: set() for reg in regs_ids}
87  for _ in xrange(len(self.irs))]
88  self.cur_kill = [{reg: set() for reg in regs_ids}
89  for _ in xrange(len(self.irs))]
90  self.prev_kill = [{reg: set() for reg in regs_ids}
91  for _ in xrange(len(self.irs))]
92  self.defout = [{reg: set() for reg in regs_ids}
93  for _ in xrange(len(self.irs))]
94 
95  for k, ir in enumerate(self.irs):
96  r, w = set(), set()
97  for i in ir:
98  r.update(x for x in i.get_r(True)
99  if isinstance(x, m2_expr.ExprId))
100  w.update(x for x in i.get_w()
101  if isinstance(x, m2_expr.ExprId))
102  if isinstance(i.dst, m2_expr.ExprMem):
103  r.update(x for x in i.dst.arg.get_r(True)
104  if isinstance(x, m2_expr.ExprId))
105  self.defout[k].update((x, {(self.label, k, i)})
106  for x in i.get_w()
107  if isinstance(x, m2_expr.ExprId))
108  self.r.append(r)
109  self.w.append(w)
110 
111  def __str__(self):
112  o = []
113  o.append('%s' % self.label)
114  for expr in self.irs:
115  for e in expr:
116  o.append('\t%s' % e)
117  o.append("")
118 
119  return "\n".join(o)
120 
121 
122 class ir(object):
123 
124  def __init__(self, arch, attrib, symbol_pool=None):
125  if symbol_pool is None:
126  symbol_pool = asm_symbol_pool()
127  self.symbol_pool = symbol_pool
128  self.blocs = {}
129  self.pc = arch.getpc(attrib)
130  self.sp = arch.getsp(attrib)
131  self.arch = arch
132  self.attrib = attrib
133 
134  def instr2ir(self, l):
135  ir_bloc_cur, ir_blocs_extra = self.get_ir(l)
136  return ir_bloc_cur, ir_blocs_extra
137 
138  def get_label(self, ad):
139  """Transforms an ExprId/ExprInt/label/int into a label
140  @ad: an ExprId/ExprInt/label/int"""
141 
142  if (isinstance(ad, m2_expr.ExprId) and
143  isinstance(ad.name, asmbloc.asm_label)):
144  ad = ad.name
145  if isinstance(ad, m2_expr.ExprInt):
146  ad = int(ad.arg)
147  if type(ad) in [int, long]:
148  ad = self.symbol_pool.getby_offset_create(ad)
149  elif isinstance(ad, asmbloc.asm_label):
150  ad = self.symbol_pool.getby_name_create(ad.name)
151  return ad
152 
153  def get_bloc(self, ad):
154  """Returns the irbloc associated to an ExprId/ExprInt/label/int
155  @ad: an ExprId/ExprInt/label/int"""
156 
157  label = self.get_label(ad)
158  return self.blocs.get(label, None)
159 
160  def add_instr(self, l, ad=0, gen_pc_updt = False):
161  b = asmbloc.asm_bloc(l)
162  b.lines = [l]
163  self.add_bloc(b, gen_pc_updt)
164 
165  def merge_multi_affect(self, affect_list):
166  """
167  If multiple affection to a same ExprId are present in @affect_list,
168  merge them (in place).
169  For instance, XCGH AH, AL semantic is
170  [
171  RAX = {RAX[0:8],0,8, RAX[0:8],8,16, RAX[16:64],16,64}
172  RAX = {RAX[8:16],0,8, RAX[8:64],8,64}
173  ]
174  This function will update @affect_list to replace previous ExprAff by
175  [
176  RAX = {RAX[8:16],0,8, RAX[0:8],8,16, RAX[16:64],16,64}
177  ]
178  """
179 
180  # Extract side effect
181  effect = {}
182  for expr in affect_list:
183  effect[expr.dst] = effect.get(expr.dst, []) + [expr]
184 
185  # Find candidates
186  for dst, expr_list in effect.items():
187  if len(expr_list) <= 1:
188  continue
189 
190  # Only treat ExprCompose list
191  if any(map(lambda e: not(isinstance(e.src, m2_expr.ExprCompose)),
192  expr_list)):
193  continue
194 
195  # Find collision
196  e_colision = reduce(lambda x, y: x.union(y),
197  (e.get_modified_slice() for e in expr_list),
198  set())
199  # Sort interval collision
200  known_intervals = sorted([(x[1], x[2]) for x in e_colision])
201 
202  # Fill with missing data
203  missing_i = get_missing_interval(known_intervals, 0, dst.size)
204 
205  remaining = ((m2_expr.ExprSlice(dst, *interval),
206  interval[0],
207  interval[1])
208  for interval in missing_i)
209 
210  # Build the merging expression
211  slices = sorted(e_colision.union(remaining), key=lambda x: x[1])
212  final_dst = m2_expr.ExprCompose(slices)
213 
214  # Remove unused expression
215  for expr in expr_list:
216  affect_list.remove(expr)
217 
218  # Add the merged one
219  affect_list.append(m2_expr.ExprAff(dst, final_dst))
220 
221 
222  def getby_offset(self, offset):
223  out = set()
224  for irb in self.blocs.values():
225  for l in irb.lines:
226  if l.offset <= offset < l.offset + l.l:
227  out.add(irb)
228  return out
229 
230  def gen_pc_update(self, c, l):
231  c.irs.append([m2_expr.ExprAff(self.pc, m2_expr.ExprInt_from(self.pc,
232  l.offset))])
233  c.lines.append(l)
234 
235  def add_bloc(self, bloc, gen_pc_updt = False):
236  c = None
237  ir_blocs_all = []
238  for l in bloc.lines:
239  if c is None:
240  label = self.get_instr_label(l)
241  c = irbloc(label, [], [])
242  ir_blocs_all.append(c)
243  ir_bloc_cur, ir_blocs_extra = self.instr2ir(l)
244 
245  if gen_pc_updt is not False:
246  self.gen_pc_update(c, l)
247 
248  c.irs.append(ir_bloc_cur)
249  c.lines.append(l)
250 
251 
252  if ir_blocs_extra:
253  for b in ir_blocs_extra:
254  b.lines = [l] * len(b.irs)
255  ir_blocs_all += ir_blocs_extra
256  c = None
257  self.post_add_bloc(bloc, ir_blocs_all)
258  return ir_blocs_all
259 
260  def expr_fix_regs_for_mode(self, e, *args, **kwargs):
261  return e
262 
263  def expraff_fix_regs_for_mode(self, e, *args, **kwargs):
264  return e
265 
266  def irbloc_fix_regs_for_mode(self, irbloc, *args, **kwargs):
267  return
268 
269  def is_pc_written(self, b):
270  all_pc = self.arch.pc.values()
271  for irs in b.irs:
272  for ir in irs:
273  if ir.dst in all_pc:
274  return ir
275  return None
276 
277  def set_empty_dst_to_next(self, bloc, ir_blocs):
278  for b in ir_blocs:
279  if b.dst is not None:
280  continue
281  dst = m2_expr.ExprId(self.get_next_label(bloc.lines[-1]),
282  self.pc.size)
283  b.irs.append([m2_expr.ExprAff(self.IRDst, dst)])
284  b.lines.append(b.lines[-1])
285 
286  def gen_edges(self, bloc, ir_blocs):
287  pass
288 
289  def post_add_bloc(self, bloc, ir_blocs):
290  self.set_empty_dst_to_next(bloc, ir_blocs)
291  self.gen_edges(bloc, ir_blocs)
292 
293  for irb in ir_blocs:
294  self.irbloc_fix_regs_for_mode(irb, self.attrib)
295 
296  # Detect multi-affectation
297  for affect_list in irb.irs:
298  self.merge_multi_affect(affect_list)
299 
300  self.blocs[irb.label] = irb
301 
302 
303  def get_instr_label(self, instr):
304  """Returns the label associated to an instruction
305  @instr: current instruction"""
306 
307  return self.symbol_pool.getby_offset_create(instr.offset)
308 
309  def gen_label(self):
310  # TODO: fix hardcoded offset
311  l = self.symbol_pool.gen_label()
312  return l
313 
314  def get_next_label(self, instr):
315  l = self.symbol_pool.getby_offset_create(instr.offset + instr.l)
316  return l
317 
318  def simplify_blocs(self):
319  for b in self.blocs.values():
320  for ir in b.irs:
321  for i, r in enumerate(ir):
322  ir[i] = m2_expr.ExprAff(expr_simp(r.dst), expr_simp(r.src))
323 
324  def replace_expr_in_ir(self, bloc, rep):
325  for irs in bloc.irs:
326  for i, l in enumerate(irs):
327  irs[i] = l.replace_expr(rep)
328 
329  def get_rw(self, regs_ids = []):
330  """
331  Calls get_rw(irb) for each bloc
332  @regs_ids : ids of registers used in IR
333  """
334  for b in self.blocs.values():
335  b.get_rw(regs_ids)
336 
337  def ExprIsLabel(self, l):
338  return isinstance(l, m2_expr.ExprId) and isinstance(l.name,
339  asmbloc.asm_label)
def simplify_blocs
Definition: ir.py:318
def ExprIsLabel
Definition: ir.py:337
def get_bloc
Definition: ir.py:153
def _set_dst
Definition: ir.py:58
def __init__
Definition: ir.py:124
def get_instr_label
Definition: ir.py:303
def _get_dst
Definition: ir.py:42
def expraff_fix_regs_for_mode
Definition: ir.py:263
def add_instr
Definition: ir.py:160
def set_empty_dst_to_next
Definition: ir.py:277
def getby_offset
Definition: ir.py:222
def replace_expr_in_ir
Definition: ir.py:324
def instr2ir
Definition: ir.py:134
def irbloc_fix_regs_for_mode
Definition: ir.py:266
def gen_pc_update
Definition: ir.py:230
def gen_label
Definition: ir.py:309
def dst_linenb
Definition: ir.py:72
def gen_edges
Definition: ir.py:286
def get_rw
Definition: ir.py:329
def merge_multi_affect
Definition: ir.py:165
def is_pc_written
Definition: ir.py:269
def add_bloc
Definition: ir.py:235
def get_label
Definition: ir.py:138
def __init__
Definition: ir.py:32
def expr_fix_regs_for_mode
Definition: ir.py:260
def get_next_label
Definition: ir.py:314
def post_add_bloc
Definition: ir.py:289