# This file is part of Metasm, the Ruby assembly manipulation suite # Copyright (C) 2006-2009 Yoann GUILLOT # # Licence is LGPL, see LICENCE in the top-level directory require 'metasm/main' require 'metasm/render' module Metasm # symbolic pointer dereference # API similar to Expression class Indirection < ExpressionType # Expression (the pointer) attr_accessor :target alias pointer target alias pointer= target= # length in bytes of data referenced attr_accessor :len # address of the instruction who generated the indirection attr_accessor :origin def initialize(target, len, origin) @target, @len, @origin = target, len, origin end def reduce_rec ptr = Expression[@target.reduce] (ptr == Expression::Unknown) ? ptr : Indirection.new(ptr, @len, @origin) end def bind(h) h[self] || Indirection.new(@target.bind(h), @len, @origin) end def hash ; @target.hash^@len.to_i end def eql?(o) o.class == self.class and [o.target, o.len] == [@target, @len] end alias == eql? include Renderable def render ret = [] qual = {1 => 'byte', 2 => 'word', 4 => 'dword', 8 => 'qword'}[len] || "_#{len*8}bits" if len ret << "#{qual} ptr " if qual ret << '[' << @target << ']' end # returns the complexity of the expression (number of externals +1 per indirection) def complexity 1+@target.complexity end def self.[](t, l, o=nil) new(Expression[*t], l, o) end def inspect "Indirection[#{@target.inspect.sub(/^Expression/, '')}, #{@len.inspect}#{', '+@origin.inspect if @origin}]" end def externals @target.externals end def match_rec(pattern, vars) return false if not pattern.kind_of? Indirection pt = pattern.target if vars[pt] return false if @target != vars[pt] elsif vars.has_key? pt vars[pt] = @target elsif pt.kind_of? ExpressionType return false if not @target.match_rec(pt, vars) else return false if pt != @target end pl = pattern.len if vars[pl] return false if @len != vars[pl] elsif vars.has_key? pl vars[pl] = @len else return false if pl != @len end vars end end class Expression # returns the complexity of the expression (number of externals +1 per indirection) def complexity case @lexpr when ExpressionType; @lexpr.complexity when nil, ::Numeric; 0 else 1 end + case @rexpr when ExpressionType; @rexpr.complexity when nil, ::Numeric; 0 else 1 end end def expr_indirections ret = case @lexpr when Indirection; [@lexpr] when ExpressionType; @lexpr.expr_indirections else [] end case @rexpr when Indirection; ret << @rexpr when ExpressionType; ret.concat @rexpr.expr_indirections else ret end end end class EncodedData # returns an ::Integer from self.ptr, advances ptr # bytes from rawsize to virtsize = 0 # ignores self.relocations def get_byte @ptr += 1 if @ptr <= @data.length b = @data[ptr-1] b = b.unpack('C').first if b.kind_of? ::String # 1.9 b elsif @ptr <= @virtsize 0 end end # reads len bytes from self.data, advances ptr # bytes from rawsize to virtsize are returned as zeroes # ignores self.relocations def read(len=@virtsize-@ptr) vlen = len vlen = @virtsize-@ptr if len > @virtsize-@ptr str = (@ptr < @data.length) ? @data[@ptr, vlen] : '' str = str.to_str.ljust(vlen, "\0") if str.length < vlen @ptr += len str end # decodes an immediate value from self.ptr, advances ptr # returns an Expression on relocation, or an ::Integer # if ptr has a relocation but the type/endianness does not match, the reloc is ignored and a warning is issued # TODO arg type => sign+len def decode_imm(type, endianness) raise "invalid imm type #{type.inspect}" if not isz = Expression::INT_SIZE[type] if rel = @reloc[@ptr] if Expression::INT_SIZE[rel.type] == isz and rel.endianness == endianness @ptr += rel.length return rel.target end puts "W: Immediate type/endianness mismatch, ignoring relocation #{rel.target.inspect} (wanted #{type.inspect})" if $DEBUG end Expression.decode_imm(read(isz/8), type, endianness) end alias decode_immediate decode_imm end class Expression # decodes an immediate from a raw binary string # type may be a length in bytes, interpreted as unsigned, or an expression type (eg :u32) # endianness is either an endianness or an object than responds to endianness def self.decode_imm(str, type, endianness, off=0) type = INT_SIZE.keys.find { |k| k.to_s[0] == ?a and INT_SIZE[k] == 8*type } if type.kind_of? ::Integer endianness = endianness.endianness if not endianness.kind_of? ::Symbol str = str[off, INT_SIZE[type]/8].to_s str = str.reverse if endianness == :little val = str.unpack('C*').inject(0) { |val_, b| (val_ << 8) | b } val = make_signed(val, INT_SIZE[type]) if type.to_s[0] == ?i val end class << self alias decode_immediate decode_imm end end class CPU # decodes the instruction at edata.ptr, mapped at virtual address off # returns a DecodedInstruction or nil def decode_instruction(edata, addr) @bin_lookaside ||= build_bin_lookaside di = decode_findopcode edata if edata.ptr <= edata.length di.address = addr if di di = decode_instr_op(edata, di) if di decode_instr_interpret(di, addr) if di end # matches the binary opcode at edata.ptr # returns di or nil def decode_findopcode(edata) DecodedInstruction.new self end # decodes di.instruction # returns di or nil def decode_instr_op(edata, di) end # may modify di.instruction.args for eg jump offset => absolute address # returns di or nil def decode_instr_interpret(di, addr) di end # number of instructions following a jump that are still executed def delay_slot(di=nil) 0 end def disassembler_default_func DecodedFunction.new end # return something like backtrace_binding in the forward direction # set pc_reg to some reg name (eg :pc) to include effects on the instruction pointer def get_fwdemu_binding(di, pc_reg=nil) fdi = di.backtrace_binding ||= get_backtrace_binding(di) fdi = fix_fwdemu_binding(di, fdi) if pc_reg if di.opcode.props[:setip] xr = get_xrefs_x(nil, di) if xr and xr.length == 1 fdi[pc_reg] = xr[0] else fdi[:incomplete_binding] = Expression[1] end else fdi[pc_reg] = Expression[pc_reg, :+, di.bin_length] end end fdi end # patch a forward binding from the backtrace binding # useful only on specific instructions that update a register *and* dereference that register (eg push) def fix_fwdemu_binding(di, fbd) fbd end end end