Miasm2
 All Classes Namespaces Files Functions Variables Typedefs Properties Macros
expression_helper.py
Go to the documentation of this file.
1 #
2 # Copyright (C) 2011 EADS France, Fabrice Desclaux <fabrice.desclaux@eads.net>
3 #
4 # This program is free software; you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation; either version 2 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
13 #
14 # You should have received a copy of the GNU General Public License along
15 # with this program; if not, write to the Free Software Foundation, Inc.,
16 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17 #
18 
19 # Expressions manipulation functions
20 import itertools
21 import collections
22 import random
23 import string
24 
25 import miasm2.expression.expression as m2_expr
26 
27 
28 def parity(a):
29  tmp = (a) & 0xFFL
30  cpt = 1
31  while tmp != 0:
32  cpt ^= tmp & 1
33  tmp >>= 1
34  return cpt
35 
36 
38  sources = {}
39  non_slice = {}
40  sources_int = {}
41  for a in args:
42  if isinstance(a[0], m2_expr.ExprInt):
43  # sources_int[a.start] = a
44  # copy ExprInt because we will inplace modify arg just below
45  # /!\ TODO XXX never ever modify inplace args...
46  sources_int[a[1]] = (m2_expr.ExprInt(int(a[0].arg),
47  a[2] - a[1]),
48  a[1],
49  a[2])
50  elif isinstance(a[0], m2_expr.ExprSlice):
51  if not a[0].arg in sources:
52  sources[a[0].arg] = []
53  sources[a[0].arg].append(a)
54  else:
55  non_slice[a[1]] = a
56  # find max stop to determine size
57  max_size = None
58  for a in args:
59  if max_size is None or max_size < a[2]:
60  max_size = a[2]
61 
62  # first simplify all num slices
63  final_sources = []
64  sorted_s = []
65  for x in sources_int.values():
66  x = list(x)
67  # mask int
68  v = x[0].arg & ((1 << (x[2] - x[1])) - 1)
69  x[0] = m2_expr.ExprInt_from(x[0], v)
70  x = tuple(x)
71  sorted_s.append((x[1], x))
72  sorted_s.sort()
73  while sorted_s:
74  start, v = sorted_s.pop()
75  out = [m2_expr.ExprInt(v[0].arg), v[1], v[2]]
76  size = v[2] - v[1]
77  while sorted_s:
78  if sorted_s[-1][1][2] != start:
79  break
80  s_start, s_stop = sorted_s[-1][1][1], sorted_s[-1][1][2]
81  size += s_stop - s_start
82  a = m2_expr.mod_size2uint[size](
83  (int(out[0].arg) << (out[1] - s_start)) +
84  int(sorted_s[-1][1][0].arg))
85  out[0] = m2_expr.ExprInt(a)
86  sorted_s.pop()
87  out[1] = s_start
88  out[0] = m2_expr.ExprInt(int(out[0].arg), size)
89  final_sources.append((start, out))
90 
91  final_sources_int = final_sources
92  # check if same sources have corresponding start/stop
93  # is slice AND is sliceto
94  simp_sources = []
95  for args in sources.values():
96  final_sources = []
97  sorted_s = []
98  for x in args:
99  sorted_s.append((x[1], x))
100  sorted_s.sort()
101  while sorted_s:
102  start, v = sorted_s.pop()
103  ee = v[0].arg[v[0].start:v[0].stop]
104  out = ee, v[1], v[2]
105  while sorted_s:
106  if sorted_s[-1][1][2] != start:
107  break
108  if sorted_s[-1][1][0].stop != out[0].start:
109  break
110 
111  start = sorted_s[-1][1][1]
112  # out[0].start = sorted_s[-1][1][0].start
113  o_e, _, o_stop = out
114  o1, o2 = sorted_s[-1][1][0].start, o_e.stop
115  o_e = o_e.arg[o1:o2]
116  out = o_e, start, o_stop
117  # update _size
118  # out[0]._size = out[0].stop-out[0].start
119  sorted_s.pop()
120  out = out[0], start, out[2]
121 
122  final_sources.append((start, out))
123 
124  simp_sources += final_sources
125 
126  simp_sources += final_sources_int
127 
128  for i, v in non_slice.items():
129  simp_sources.append((i, v))
130 
131  simp_sources.sort()
132  simp_sources = [x[1] for x in simp_sources]
133  return simp_sources
134 
135 
136 op_propag_cst = ['+', '*', '^', '&', '|', '>>',
137  '<<', "a>>", ">>>", "<<<",
138  "/", "%", 'idiv', 'imod', 'umod', 'udiv']
139 
140 
141 def is_pure_int(e):
142  """
143  return True if expr is only composed with integers
144  /!\ ExprCond returns True is src1 and src2 are integers
145  """
146  def modify_cond(e):
147  if isinstance(e, m2_expr.ExprCond):
148  return e.src1 | e.src2
149  return e
150 
151  def find_int(e, s):
152  if isinstance(e, m2_expr.ExprId) or isinstance(e, m2_expr.ExprMem):
153  s.add(e)
154  return e
155  s = set()
156  new_e = e.visit(modify_cond)
157  new_e.visit(lambda x: find_int(x, s))
158  if s:
159  return False
160  return True
161 
162 
164  if isinstance(e, m2_expr.ExprInt):
165  return True
166  if isinstance(e, m2_expr.ExprCond):
167  return (isinstance(e.src1, m2_expr.ExprInt) and
168  isinstance(e.src2, m2_expr.ExprInt))
169  return False
170 
171 
172 def fast_unify(seq, idfun=None):
173  # order preserving unifying list function
174  if idfun is None:
175  idfun = lambda x: x
176  seen = {}
177  result = []
178  for item in seq:
179  marker = idfun(item)
180 
181  if marker in seen:
182  continue
183  seen[marker] = 1
184  result.append(item)
185  return result
186 
187 def get_missing_interval(all_intervals, i_min=0, i_max=32):
188  """Return a list of missing interval in all_interval
189  @all_interval: list of (int, int)
190  @i_min: int, minimal missing interval bound
191  @i_max: int, maximal missing interval bound"""
192 
193  my_intervals = all_intervals[:]
194  my_intervals.sort()
195  my_intervals.append((i_max, i_max))
196 
197  missing_i = []
198  last_pos = i_min
199  for start, stop in my_intervals:
200  if last_pos != start:
201  missing_i.append((last_pos, start))
202  last_pos = stop
203  return missing_i
204 
205 
207  """Identify variables in an expression.
208  Returns:
209  - variables with their corresponding values
210  - original expression with variables translated
211  """
212 
213  # Attribute used to distinguish created variables from original ones
214  is_var_ident = "is_var_ident"
215 
216  def __init__(self, expr, var_prefix="v"):
217  """Set the expression @expr to handle and launch variable identification
218  process
219  @expr: Expr instance
220  @var_prefix: (optional) prefix of the variable name, default is 'v'"""
221 
222  # Init
223  self.var_indice = itertools.count()
224  self.var_asked = set()
225  self._vars = {} # VarID -> Expr
226  self.var_prefix = var_prefix
227 
228  # Launch recurrence
229  self.find_variables_rec(expr)
230 
231  # Compute inter-variable dependencies
232  has_change = True
233  while has_change:
234  has_change = False
235  for var_id, var_value in self._vars.iteritems():
236  cur = var_value
237 
238  # Do not replace with itself
239  to_replace = {v_val:v_id
240  for v_id, v_val in self._vars.iteritems()
241  if v_id != var_id}
242  var_value = var_value.replace_expr(to_replace)
243 
244  if cur != var_value:
245  # Force @self._vars update
246  has_change = True
247  self._vars[var_id] = var_value
248  break
249 
250  # Replace in the original equation
251  self._equation = expr.replace_expr({v_val: v_id for v_id, v_val
252  in self._vars.iteritems()})
253 
254  # Compute variables dependencies
255  self._vars_ordered = collections.OrderedDict()
256  todo = set(self._vars.iterkeys())
257  needs = {}
258 
259  ## Build initial needs
260  for var_id, var_expr in self._vars.iteritems():
261  ### Handle corner cases while using Variable Identifier on an
262  ### already computed equation
263  needs[var_id] = [var_name
264  for var_name in var_expr.get_r(mem_read=True)
265  if self.is_var_identifier(var_name) and \
266  var_name in todo and \
267  var_name != var_id]
268 
269  ## Build order list
270  while todo:
271  done = set()
272  for var_id in todo:
273  all_met = True
274  for need in needs[var_id]:
275  if need not in self._vars_ordered:
276  # A dependency is not met
277  all_met = False
278  break
279  if not all_met:
280  continue
281 
282  # All dependencies are already met, add current
283  self._vars_ordered[var_id] = self._vars[var_id]
284  done.add(var_id)
285 
286  # Update the todo list
287  for element_done in done:
288  todo.remove(element_done)
289 
290  @classmethod
291  def is_var_identifier(cls, expr):
292  "Return True iff @expr is a variable identifier"
293  if not isinstance(expr, m2_expr.ExprId):
294  return False
295 
296  return hasattr(expr, cls.is_var_ident) and \
297  getattr(expr, cls.is_var_ident) == True
298 
299  def find_variables_rec(self, expr):
300  """Recursive method called by find_variable to expand @expr.
301  Set @var_names and @var_values.
302  This implementation is faster than an expression visitor because
303  we do not rebuild each expression.
304  """
305 
306  if (expr in self.var_asked):
307  # Expr has already been asked
308 
309  if (expr not in self._vars.values()):
310  # Create var
311  identifier = m2_expr.ExprId("%s%s" % (self.var_prefix,
312  self.var_indice.next()),
313  size = expr.size)
314  setattr(identifier, self.__class__.is_var_ident, True)
315  self._vars[identifier] = expr
316 
317  # Recursion stop case
318  return
319  else:
320  # First time for @expr
321  self.var_asked.add(expr)
322 
323  if isinstance(expr, m2_expr.ExprOp):
324  for a in expr.args:
325  self.find_variables_rec(a)
326 
327  elif isinstance(expr, m2_expr.ExprInt):
328  pass
329 
330  elif isinstance(expr, m2_expr.ExprId):
331  pass
332 
333  elif isinstance(expr, m2_expr.ExprMem):
334  self.find_variables_rec(expr.arg)
335 
336  elif isinstance(expr, m2_expr.ExprCompose):
337  for a in expr.args:
338  self.find_variables_rec(list(a)[0])
339 
340  elif isinstance(expr, m2_expr.ExprSlice):
341  self.find_variables_rec(expr.arg)
342 
343  elif isinstance(expr, m2_expr.ExprCond):
344  self.find_variables_rec(expr.cond)
345  self.find_variables_rec(expr.src1)
346  self.find_variables_rec(expr.src2)
347 
348  else:
349  raise NotImplementedError("Type not handled: %s" % expr)
350 
351  @property
352  def vars(self):
353  return self._vars_ordered
354 
355  @property
356  def equation(self):
357  return self._equation
358 
359  def __str__(self):
360  "Display variables and final equation"
361  out = ""
362  for var_id, var_expr in self.vars.iteritems():
363  out += "%s = %s\n" % (var_id, var_expr)
364  out += "Final: %s" % self.equation
365  return out
366 
367 
369  """Return an expression randomly generated"""
370 
371  # Identifiers length
372  identifier_len = 5
373  # Identifiers' name charset
374  identifier_charset = string.letters
375  # Number max value
376  number_max = 0xFFFFFFFF
377  # Available operations
378  operations_by_args_number = {1: ["-"],
379  2: ["<<", "<<<", ">>", ">>>"],
380  "2+": ["+", "*", "&", "|", "^"],
381  }
382  # Maximum number of argument for operations
383  operations_max_args_number = 5
384  # If set, output expression is a perfect tree
385  perfect_tree = True
386  # Max argument size in slice, relative to slice size
387  slice_add_size = 10
388  # Maximum number of layer in compose
389  compose_max_layer = 5
390  # Maximum size of memory address in bits
391  memory_max_address_size = 32
392  # Re-use already generated elements to mimic a more realistic behavior
393  reuse_element = True
394  generated_elements = {} # (depth, size) -> [Expr]
395 
396  @classmethod
397  def identifier(cls, size=32):
398  """Return a random identifier
399  @size: (optional) identifier size
400  """
401  return m2_expr.ExprId("".join([random.choice(cls.identifier_charset)
402  for _ in xrange(cls.identifier_len)]),
403  size=size)
404 
405  @classmethod
406  def number(cls, size=32):
407  """Return a random number
408  @size: (optional) number max bits
409  """
410  num = random.randint(0, cls.number_max % (2**size))
411  return m2_expr.ExprInt(num, size)
412 
413  @classmethod
414  def atomic(cls, size=32):
415  """Return an atomic Expression
416  @size: (optional) Expr size
417  """
418  available_funcs = [cls.identifier, cls.number]
419  return random.choice(available_funcs)(size=size)
420 
421  @classmethod
422  def operation(cls, size=32, depth=1):
423  """Return an ExprOp
424  @size: (optional) Operation size
425  @depth: (optional) Expression depth
426  """
427  operand_type = random.choice(cls.operations_by_args_number.keys())
428  if isinstance(operand_type, str) and "+" in operand_type:
429  number_args = random.randint(int(operand_type[:-1]),
430  cls.operations_max_args_number)
431  else:
432  number_args = operand_type
433 
434  args = [cls._gen(size=size, depth=depth - 1)
435  for _ in xrange(number_args)]
436  operand = random.choice(cls.operations_by_args_number[operand_type])
437  return m2_expr.ExprOp(operand,
438  *args)
439 
440  @classmethod
441  def slice(cls, size=32, depth=1):
442  """Return an ExprSlice
443  @size: (optional) Operation size
444  @depth: (optional) Expression depth
445  """
446  start = random.randint(0, size)
447  stop = start + size
448  return cls._gen(size=random.randint(stop, stop + cls.slice_add_size),
449  depth=depth - 1)[start:stop]
450 
451  @classmethod
452  def compose(cls, size=32, depth=1):
453  """Return an ExprCompose
454  @size: (optional) Operation size
455  @depth: (optional) Expression depth
456  """
457  # First layer
458  upper_bound = random.randint(1, size)
459  args = [(cls._gen(size=upper_bound, depth=depth - 1), 0, upper_bound)]
460 
461  # Next layers
462  while (upper_bound < size):
463  if len(args) == (cls.compose_max_layer - 1):
464  # We reach the maximum size
465  upper_bound = size
466  else:
467  upper_bound = random.randint(args[-1][-1] + 1, size)
468 
469  args.append((cls._gen(size=upper_bound - args[-1][-1]),
470  args[-1][-1],
471  upper_bound))
472 
473  return m2_expr.ExprCompose(args)
474 
475  @classmethod
476  def memory(cls, size=32, depth=1):
477  """Return an ExprMem
478  @size: (optional) Operation size
479  @depth: (optional) Expression depth
480  """
481 
482  address_size = random.randint(1, cls.memory_max_address_size)
483  return m2_expr.ExprMem(cls._gen(size=address_size,
484  depth=depth - 1),
485  size=size)
486 
487  @classmethod
488  def _gen(cls, size=32, depth=1):
489  """Internal function for generating sub-expression according to options
490  @size: (optional) Operation size
491  @depth: (optional) Expression depth
492  /!\ @generated_elements is left modified
493  """
494  # Perfect tree handling
495  if not cls.perfect_tree:
496  depth = random.randint(max(0, depth - 2), depth)
497 
498  # Element re-use
499  if cls.reuse_element and random.choice([True, False]) and \
500  (depth, size) in cls.generated_elements:
501  return random.choice(cls.generated_elements[(depth, size)])
502 
503  # Recursion stop
504  if depth == 0:
505  return cls.atomic(size=size)
506 
507  # Build a more complex expression
508  available_funcs = [cls.operation, cls.slice, cls.compose, cls.memory]
509  gen = random.choice(available_funcs)(size=size, depth=depth)
510 
511  # Save it
512  new_value = cls.generated_elements.get((depth, size), []) + [gen]
513  cls.generated_elements[(depth, size)] = new_value
514  return gen
515 
516  @classmethod
517  def get(cls, size=32, depth=1, clean=True):
518  """Return a randomly generated expression
519  @size: (optional) Operation size
520  @depth: (optional) Expression depth
521  @clean: (optional) Clean expression cache between two calls
522  """
523  # Init state
524  if clean:
525  cls.generated_elements = {}
526 
527  # Get an element
528  got = cls._gen(size=size, depth=depth)
529 
530  # Clear state
531  if clean:
532  cls.generated_elements = {}
533 
534  return got
535 
536 def _expr_cmp_gen(arg1, arg2):
537  return (arg2 - arg1) ^ ((arg2 ^ arg1) & ((arg2 - arg1) ^ arg2))
538 
539 def expr_cmpu(arg1, arg2):
540  """
541  Returns a one bit long Expression:
542  * 1 if @arg1 is strictly greater than @arg2 (unsigned)
543  * 0 otherwise.
544  """
545  return (_expr_cmp_gen(arg1, arg2) ^ arg2 ^ arg1).msb()
546 
547 def expr_cmps(arg1, arg2):
548  """
549  Returns a one bit long Expression:
550  * 1 if @arg1 is strictly greater than @arg2 (signed)
551  * 0 otherwise.
552  """
553  return _expr_cmp_gen(arg1, arg2).msb()