# This file is part of Metasm, the Ruby assembly manipulation suite # Copyright (C) 2006-2009 Yoann GUILLOT # # Licence is LGPL, see LICENCE in the top-level directory require 'metasm/cpu/ia32/opcodes' require 'metasm/decode' module Metasm class Ia32 class ModRM def self.decode(edata, byte, endianness, adsz, opsz, seg=nil, regclass=Reg, h = {}) m = (byte >> 6) & 3 rm = byte & 7 if m == 3 return regclass.new(rm, opsz) end sum = Sum[adsz][m][rm] s, i, b, imm = nil sum.each { |a| case a when Integer if not b b = Reg.new(a, adsz) else s = 1 if h[:mrmvex] i = SimdReg.new(a, h[:mrmvex]) else i = Reg.new(a, adsz) end end when :sib sib = edata.get_byte.to_i ii = ((sib >> 3) & 7) if ii != 4 s = 1 << ((sib >> 6) & 3) if h[:mrmvex] i = SimdReg.new(ii, h[:mrmvex]) else i = Reg.new(ii, adsz) end end bb = sib & 7 if bb == 5 and m == 0 imm = Expression[edata.decode_imm("i#{adsz}".to_sym, endianness)] else b = Reg.new(bb, adsz) end when :i8, :i16, :i32 imm = Expression[edata.decode_imm(a, endianness)] end } if imm and ir = imm.reduce and ir.kind_of?(Integer) and ir < 0 and (ir < -0x10_0000 or (!b and !i)) # probably a base address -> unsigned imm = Expression[imm.reduce & ((1 << (adsz || 32)) - 1)] end opsz = h[:argsz] if h[:argsz] new adsz, opsz, s, i, b, imm, seg end end class Farptr def self.decode(edata, endianness, adsz) addr = Expression[edata.decode_imm("u#{adsz}".to_sym, endianness)] seg = Expression[edata.decode_imm(:u16, endianness)] new seg, addr end end def build_opcode_bin_mask(op) # bit = 0 if can be mutated by an field value, 1 if fixed by opcode op.bin_mask = Array.new(op.bin.length, 0) op.fields.each { |f, (oct, off)| op.bin_mask[oct] |= (@fields_mask[f] << off) } op.bin_mask.map! { |v| 255 ^ v } end def build_bin_lookaside # sets up a hash byte value => list of opcodes that may match # opcode.bin_mask is built here lookaside = Array.new(256) { [] } opcode_list.each { |op| build_opcode_bin_mask op b = op.bin[0] msk = op.bin_mask[0] for i in b..(b | (255^msk)) lookaside[i] << op if i & msk == b & msk end } lookaside end def decode_prefix(instr, byte) instr.prefix ||= {} (instr.prefix[:list] ||= []) << byte # XXX actual limit = 15-instr.length return false if instr.prefix[:list].length >= 15 case byte when 0x66; instr.prefix[:opsz] = true when 0x67; instr.prefix[:adsz] = true when 0xF0; instr.prefix[:lock] = true when 0xF2; instr.prefix[:rep] = :nz when 0xF3; instr.prefix[:rep] = :z # postprocessed by decode_instr when 0x26, 0x2E, 0x36, 0x3E, 0x64, 0x65 if byte & 0x40 == 0 v = (byte >> 3) & 3 else v = byte & 7 end instr.prefix[:seg] = SegReg.new(v) else return false end true end # tries to find the opcode encoded at edata.ptr # if no match, tries to match a prefix (update di.instruction.prefix) # on match, edata.ptr points to the first byte of the opcode (after prefixes) def decode_findopcode(edata) di = DecodedInstruction.new self while edata.ptr < edata.data.length pfx = di.instruction.prefix || {} byte = edata.data[edata.ptr] byte = byte.unpack('C').first if byte.kind_of?(::String) return di if di.opcode = @bin_lookaside[byte].find { |op| # fetch the relevant bytes from edata bseq = edata.data[edata.ptr, op.bin.length].unpack('C*') # check against full opcode mask op.bin.zip(bseq, op.bin_mask).all? { |b1, b2, m| b2 and ((b1 & m) == (b2 & m)) } and # check special cases !( # fail if any of those is true (fld = op.fields[:seg2A] and (bseq[fld[0]] >> fld[1]) & @fields_mask[:seg2A] == 1) or (fld = op.fields[:seg3A] and (bseq[fld[0]] >> fld[1]) & @fields_mask[:seg3A] < 4) or (fld = op.fields[:seg3A] || op.fields[:seg3] and (bseq[fld[0]] >> fld[1]) & @fields_mask[:seg3] > 5) or (op.props[:modrmA] and fld = op.fields[:modrm] and (bseq[fld[0]] >> fld[1]) & 0xC0 == 0xC0) or (op.props[:modrmR] and fld = op.fields[:modrm] and (bseq[fld[0]] >> fld[1]) & 0xC0 != 0xC0) or (fld = op.fields[:vex_vvvv] and @size != 64 and (bseq[fld[0]] >> fld[1]) & @fields_mask[:vex_vvvv] < 8) or (sz = op.props[:opsz] and opsz(di, op) != sz) or (sz = op.props[:adsz] and adsz(di, op) != sz) or (ndpfx = op.props[:needpfx] and not pfx[:list].to_a.include? ndpfx) or (pfx[:adsz] and op.props[:adsz] and op.props[:adsz] == @size) or # return non-ambiguous opcode (eg push.i16 in 32bit mode) / sync with addop_post in opcode.rb (pfx[:opsz] and not op.props[:opsz] and (op.args == [:i] or op.args == [:farptr] or op.name == 'ret')) or (pfx[:adsz] and not op.props[:adsz] and (op.props[:strop] or op.props[:stropz] or op.args.include?(:mrm_imm) or op.args.include?(:modrm) or op.name =~ /loop|xlat/)) or (op.name == 'nop' and op.bin[0] == 0x90 and di.instruction.prefix and di.instruction.prefix[:rex_b]) ) } break if not decode_prefix(di.instruction, edata.get_byte) di.bin_length += 1 end end def decode_instr_op(edata, di) before_ptr = edata.ptr op = di.opcode di.instruction.opname = op.name bseq = edata.read(op.bin.length).unpack('C*') # decode_findopcode ensures that data >= op.length pfx = di.instruction.prefix || {} case op.props[:needpfx] when 0x66; pfx.delete :opsz when 0x67; pfx.delete :adsz when 0xF2, 0xF3; pfx.delete :rep end if op.props[:setip] and not op.props[:stopexec] and pfx[:seg] case pfx.delete(:seg).val when 1; pfx[:jmphint] = 'hintnojmp' when 3; pfx[:jmphint] = 'hintjmp' end end field_val = lambda { |f| if fld = op.fields[f] (bseq[fld[0]] >> fld[1]) & @fields_mask[f] end } opsz = op.props[:argsz] || opsz(di) adsz = (pfx[:adsz] ? 48 - @size : @size) mmxsz = ((op.props[:xmmx] && pfx[:opsz]) ? 128 : 64) op.args.each { |a| di.instruction.args << case a when :reg; Reg.new field_val[a], opsz when :eeec; CtrlReg.new field_val[a] when :eeed; DbgReg.new field_val[a] when :eeet; TstReg.new field_val[a] when :seg2, :seg2A, :seg3, :seg3A; SegReg.new field_val[a] when :regfp; FpReg.new field_val[a] when :regmmx; SimdReg.new field_val[a], mmxsz when :regxmm; SimdReg.new field_val[a], 128 when :regymm; SimdReg.new field_val[a], 256 when :farptr; Farptr.decode edata, @endianness, opsz when :i8, :u8, :u16; Expression[edata.decode_imm(a, @endianness)] when :i; Expression[edata.decode_imm("#{op.props[:unsigned_imm] ? 'a' : 'i'}#{opsz}".to_sym, @endianness)] when :mrm_imm; ModRM.decode edata, (adsz == 16 ? 6 : 5), @endianness, adsz, opsz, pfx.delete(:seg) when :modrm; ModRM.decode edata, field_val[:modrm], @endianness, adsz, opsz, pfx.delete(:seg) when :modrmmmx; ModRM.decode edata, field_val[:modrm], @endianness, adsz, mmxsz, pfx.delete(:seg), SimdReg, :argsz => op.props[:argsz] when :modrmxmm; ModRM.decode edata, field_val[:modrm], @endianness, adsz, 128, pfx.delete(:seg), SimdReg, :argsz => op.props[:argsz], :mrmvex => op.props[:mrmvex] when :modrmymm; ModRM.decode edata, field_val[:modrm], @endianness, adsz, 256, pfx.delete(:seg), SimdReg, :argsz => op.props[:argsz], :mrmvex => op.props[:mrmvex] when :vexvreg; Reg.new((field_val[:vex_vvvv] ^ 0xf), opsz) when :vexvxmm; SimdReg.new((field_val[:vex_vvvv] ^ 0xf), 128) when :vexvymm; SimdReg.new((field_val[:vex_vvvv] ^ 0xf), 256) when :i4xmm; SimdReg.new((edata.decode_imm(:u8, @endianness) >> 4) & 7, 128) when :i4ymm; SimdReg.new((edata.decode_imm(:u8, @endianness) >> 4) & 7, 256) when :imm_val1; Expression[1] when :imm_val3; Expression[3] when :reg_cl; Reg.new 1, 8 when :reg_eax; Reg.new 0, opsz when :reg_dx; Reg.new 2, 16 when :regfp0; FpReg.new nil else raise SyntaxError, "Internal error: invalid argument #{a} in #{op.name}" end } di.bin_length += edata.ptr - before_ptr return false if edata.ptr > edata.length if op.name == 'movsx' or op.name == 'movzx' if di.opcode.props[:argsz] == 8 di.instruction.args[1].sz = 8 else di.instruction.args[1].sz = 16 end if pfx[:opsz] di.instruction.args[0].sz = 48-@size else di.instruction.args[0].sz = @size end elsif op.name == 'crc32' di.instruction.args[0].sz = 32 end case pfx.delete(:rep) when :nz if di.opcode.props[:strop] pfx[:rep] = 'rep' elsif di.opcode.props[:stropz] pfx[:rep] = 'repnz' end when :z if di.opcode.props[:strop] pfx[:rep] = 'rep' elsif di.opcode.props[:stropz] pfx[:rep] = 'repz' end end di end # converts relative jump/call offsets to absolute addresses # adds the eip delta to the offset +off+ of the instruction (may be an Expression) + its bin_length # do not call twice on the same di ! def decode_instr_interpret(di, addr) if di.opcode.props[:setip] and di.instruction.args.last.kind_of? Expression and di.instruction.opname !~ /^i?ret/ delta = di.instruction.args.last.reduce arg = Expression[[addr, :+, di.bin_length], :+, delta].reduce di.instruction.args[-1] = Expression[arg] end di end # return the list of registers as symbols in the order used by pushad # for use in backtrace and stuff, for compatibility with x64 # esp is [4] REG_SYMS = [:eax, :ecx, :edx, :ebx, :esp, :ebp, :esi, :edi] def register_symbols REG_SYMS end # interprets a condition code (in an opcode name) as an expression involving backtracked eflags # eflag_p is never computed, and this returns Expression::Unknown for this flag # ex: 'z' => Expression[:eflag_z] def decode_cc_to_expr(cc) case cc when 'o'; Expression[:eflag_o] when 'no'; Expression[:'!', :eflag_o] when 'b', 'nae', 'c'; Expression[:eflag_c] when 'nb', 'ae', 'nc'; Expression[:'!', :eflag_c] when 'z', 'e'; Expression[:eflag_z] when 'nz', 'ne'; Expression[:'!', :eflag_z] when 'be', 'na'; Expression[:eflag_c, :|, :eflag_z] when 'nbe', 'a'; Expression[:'!', [:eflag_c, :|, :eflag_z]] when 's'; Expression[:eflag_s] when 'ns'; Expression[:'!', :eflag_s] when 'p', 'pe'; Expression::Unknown when 'np', 'po'; Expression::Unknown when 'l', 'nge'; Expression[:eflag_s, :'!=', :eflag_o] when 'nl', 'ge'; Expression[:eflag_s, :==, :eflag_o] when 'le', 'ng'; Expression[[:eflag_s, :'!=', :eflag_o], :|, :eflag_z] when 'nle', 'g'; Expression[[:eflag_s, :==, :eflag_o], :&, :eflag_z] when 'ecxz'; Expression[:'!', register_symbols[1]] when 'cxz'; Expression[:'!', [register_symbols[1], :&, 0xffff]] end end def opsz(di, op=nil) if di and di.instruction.prefix and di.instruction.prefix[:opsz] and (op || di.opcode).props[:needpfx] != 0x66; 48-@size else @size end end def adsz(di, op=nil) if di and di.instruction.prefix and di.instruction.prefix[:adsz] and (op || di.opcode).props[:needpfx] != 0x67; 48-@size else @size end end # populate the @backtrace_binding hash with default values def init_backtrace_binding @backtrace_binding ||= {} eax, ecx, edx, ebx, esp, ebp, esi, edi = register_symbols ebx = ebx mask = lambda { |di| (1 << opsz(di))-1 } # 32bits => 0xffff_ffff sign = lambda { |v, di| Expression[[[v, :&, mask[di]], :>>, opsz(di)-1], :'!=', 0] } opcode_list.map { |ol| ol.basename }.uniq.sort.each { |op| binding = case op when 'mov', 'movzx', 'movd', 'movq'; lambda { |di, a0, a1| { a0 => Expression[a1] } } when 'movsx', 'movsxd' lambda { |di, a0, a1| sz1 = di.instruction.args[1].sz sign1 = Expression[[a1, :>>, sz1-1], :&, 1] { a0 => Expression[[a1, :|, [sign1, :*, (-1 << sz1)]], :&, mask[di]] } } when 'lea'; lambda { |di, a0, a1| { a0 => a1.target } } when 'xchg'; lambda { |di, a0, a1| # specialcase xchg al, ah (conflict on eax not handled in get_backtrace_binding) if a0.kind_of?(Expression) and a1.kind_of?(Expression) and a0.op == :& and a0.rexpr == 255 and a1.op == :& and a1.rexpr == 255 and ((a0.lexpr.kind_of?(Expression) and a0.lexpr.lexpr == a1.lexpr and a0.lexpr.op == :>> and a0.lexpr.rexpr == 8) or (a1.lexpr.kind_of?(Expression) and a1.lexpr.lexpr == a0.lexpr and a1.lexpr.op == :>> and a1.lexpr.rexpr == 8)) tgreg = a0.lexpr.kind_of?(Expression) ? a1.lexpr : a0.lexpr invmask = (@size == 64 ? 0xffff_ffff_ffff_0000 : 0xffff_0000) { tgreg => Expression[[tgreg, :&, invmask], :|, [[[tgreg, :>>, 8], :&, 0x00ff], :|, [[tgreg, :<<, 8], :&, 0xff00]]] } else { a0 => Expression[a1], a1 => Expression[a0] } end } when 'add', 'sub', 'or', 'xor', 'and', 'pxor', 'adc', 'sbb' lambda { |di, a0, a1| e_op = { 'add' => :+, 'sub' => :-, 'or' => :|, 'and' => :&, 'xor' => :^, 'pxor' => :^, 'adc' => :+, 'sbb' => :- }[op] ret = Expression[a0, e_op, a1] ret = Expression[ret, e_op, :eflag_c] if op == 'adc' or op == 'sbb' # optimises eax ^ eax => 0 # avoid hiding memory accesses (to not hide possible fault) ret = Expression[ret.reduce] if not a0.kind_of? Indirection { a0 => ret } } when 'xadd'; lambda { |di, a0, a1| { a0 => Expression[a0, :+, a1], a1 => Expression[a0] } } when 'inc'; lambda { |di, a0| { a0 => Expression[a0, :+, 1] } } when 'dec'; lambda { |di, a0| { a0 => Expression[a0, :-, 1] } } when 'not'; lambda { |di, a0| { a0 => Expression[a0, :^, mask[di]] } } when 'neg'; lambda { |di, a0| { a0 => Expression[:-, a0] } } when 'rol', 'ror' lambda { |di, a0, a1| e_op = (op[2] == ?r ? :>> : :<<) inv_op = {:<< => :>>, :>> => :<< }[e_op] operandsize = di.instruction.args[0].sz operandmask = (1 << operandsize) - 1 sz = [a1, :%, operandsize] isz = [[operandsize, :-, a1], :%, operandsize] # ror a, b => (a >> b) | (a << (32-b)) { a0 => Expression[[[[a0, :&, operandmask], e_op, sz], :|, [[a0, :&, operandmask], inv_op, isz]], :&, operandmask] } } when 'shl', 'sal'; lambda { |di, a0, a1| { a0 => Expression[a0, (op[-1] == ?r ? :>> : :<<), [a1, :%, [opsz(di), 32].max]] } } when 'sar'; lambda { |di, a0, a1| sz = [opsz(di), 32].max a1 = [a1, :%, sz] { a0 => Expression[[a0, :>>, a1], :|, [[[mask[di], :*, sign[a0, di]], :<<, [sz, :-, a1]], :&, mask[di]]] } } when 'shr'; lambda { |di, a0, a1| { a0 => Expression[[a0, :&, mask[di]], :>>, [a1, :%, opsz(di)]] } } when 'shrd' lambda { |di, a0, a1, a2| { a0 => Expression[[a0, :>>, [a2, :%, opsz(di)]], :|, [a1, :<<, [[opsz(di), :-, a2], :%, opsz(di)]]] } } when 'shld' lambda { |di, a0, a1, a2| { a0 => Expression[[a0, :<<, [a2, :%, opsz(di)]], :|, [a1, :>>, [[opsz(di), :-, a2], :%, opsz(di)]]] } } when 'cwd', 'cdq', 'cqo'; lambda { |di| { Expression[edx, :&, mask[di]] => Expression[mask[di], :*, sign[eax, di]] } } when 'cbw', 'cwde', 'cdqe'; lambda { |di| o2 = opsz(di)/2 ; m2 = (1 << o2) - 1 { Expression[eax, :&, mask[di]] => Expression[[eax, :&, m2], :|, [m2 << o2, :*, [[eax, :>>, o2-1], :&, 1]]] } } when 'push' lambda { |di, a0| { esp => Expression[esp, :-, opsz(di)/8], Indirection[esp, opsz(di)/8, di.address] => Expression[a0] } } when 'pop' lambda { |di, a0| { esp => Expression[esp, :+, opsz(di)/8], a0 => Indirection[esp, opsz(di)/8, di.address] } } when 'pushfd', 'pushf', 'pushfq' # TODO Unknown per bit lambda { |di| efl = Expression[0x202] bts = lambda { |pos, v| efl = Expression[efl, :|, [[v, :&, 1], :<<, pos]] } bts[0, :eflag_c] bts[6, :eflag_z] bts[7, :eflag_s] bts[11, :eflag_o] { esp => Expression[esp, :-, opsz(di)/8], Indirection[esp, opsz(di)/8, di.address] => efl } } when 'popfd', 'popf', 'popfq' lambda { |di| bt = lambda { |pos| Expression[[Indirection[esp, opsz(di)/8, di.address], :>>, pos], :&, 1] } { esp => Expression[esp, :+, opsz(di)/8], :eflag_c => bt[0], :eflag_z => bt[6], :eflag_s => bt[7], :eflag_o => bt[11] } } when 'sahf' lambda { |di| bt = lambda { |pos| Expression[[eax, :>>, 8+pos], :&, 1] } { :eflag_c => bt[0], :eflag_z => bt[6], :eflag_s => bt[7] } } when 'lahf' lambda { |di| efl = Expression[2] bts = lambda { |pos, v| efl = Expression[efl, :|, [[v, :&, 1], :<<, pos]] } bts[0, :eflag_c] #bts[2, :eflag_p] #bts[4, :eflag_a] bts[6, :eflag_z] bts[7, :eflag_s] { Expression[[eax, :>>, 8], :&, 0xff] => efl } } when 'pushad' lambda { |di| ret = {} st_off = 0 register_symbols.reverse_each { |r| ret[Indirection[Expression[esp, :+, st_off].reduce, opsz(di)/8, di.address]] = Expression[r] st_off += opsz(di)/8 } ret[esp] = Expression[esp, :-, st_off] ret } when 'popad' lambda { |di| ret = {} st_off = 0 register_symbols.reverse_each { |r| ret[r] = Indirection[Expression[esp, :+, st_off].reduce, opsz(di)/8, di.address] st_off += opsz(di)/8 } ret[esp] = Expression[esp, :+, st_off] # esp is not popped ret } when 'call' lambda { |di, a0| sz = opsz(di)/8 if a0.kind_of? Farptr { esp => Expression[esp, :-, 2*sz], Indirection[esp, sz, di.address] => Expression[di.next_addr], Indirection[[esp, :+, sz], sz, di.address] => Expression::Unknown } else { esp => Expression[esp, :-, sz], Indirection[esp, sz, di.address] => Expression[di.next_addr] } end } when 'callf' lambda { |di, a0| sz = opsz(di)/8 { esp => Expression[esp, :-, 2*sz], Indirection[esp, sz, di.address] => Expression[di.next_addr], Indirection[[esp, :+, sz], sz, di.address] => Expression::Unknown } } when 'ret'; lambda { |di, *a| { esp => Expression[esp, :+, [opsz(di)/8, :+, a[0] || 0]] } } when 'retf';lambda { |di, *a| { esp => Expression[esp, :+, [opsz(di)/4, :+, a[0] || 0]] } } when 'loop', 'loopz', 'loopnz'; lambda { |di, a0| { ecx => Expression[ecx, :-, 1] } } when 'enter' lambda { |di, a0, a1| sz = opsz(di)/8 depth = a1.reduce % 32 b = { Indirection[ebp, sz, di.address] => Expression[ebp], Indirection[[esp, :+, a0.reduce+sz*depth], sz, di.address] => Expression[ebp], ebp => Expression[esp, :-, sz], esp => Expression[esp, :-, a0.reduce+sz*depth+sz] } (1..depth).each { |i| b[Indirection[[esp, :+, a0.reduce+i*sz], sz, di.address]] = b[Indirection[[ebp, :-, i*sz], sz, di.address]] = Expression::Unknown # TODO Indirection[[ebp, :-, i*sz], sz, di.address] } b } when 'leave'; lambda { |di| { ebp => Indirection[[ebp], opsz(di)/8, di.address], esp => Expression[ebp, :+, opsz(di)/8] } } when 'aaa'; lambda { |di| { eax => Expression::Unknown, :incomplete_binding => Expression[1] } } when 'imul' lambda { |di, *a| if not a[1] # 1 operand from: store result in edx:eax bd = {} m = mask[di] s = opsz(di) e = Expression[Expression.make_signed(Expression[a[0], :&, m], s), :*, Expression.make_signed(Expression[eax, :&, m], s)] if s == 8 bd[Expression[eax, :&, 0xffff]] = e else bd[Expression[eax, :&, m]] = Expression[e, :&, m] bd[Expression[edx, :&, m]] = Expression[[e, :>>, opsz(di)], :&, m] end # XXX eflags? next bd end if a[2]; e = Expression[a[1], :*, a[2]] else e = Expression[[a[0], :*, a[1]], :&, (1 << (di.instruction.args.first.sz || opsz(di))) - 1] end { a[0] => e } } when 'mul' lambda { |di, *a| m = mask[di] e = Expression[a, :*, [eax, :&, m]] if opsz(di) == 8 { Expression[eax, :&, 0xffff] => e } else { Expression[eax, :&, m] => Expression[e, :&, m], Expression[edx, :&, m] => Expression[[e, :>>, opsz(di)], :&, m] } end } when 'div', 'idiv'; lambda { |di, *a| { eax => Expression::Unknown, edx => Expression::Unknown, :incomplete_binding => Expression[1] } } when 'rdtsc'; lambda { |di| { eax => Expression::Unknown, edx => Expression::Unknown, :incomplete_binding => Expression[1] } } when /^(stos|movs|lods|scas|cmps)[bwdq]$/ lambda { |di, *a| next {:incomplete_binding => 1} if di.opcode.args.include?(:regxmm) # XXX movsd xmm0, xmm1... op =~ /^(stos|movs|lods|scas|cmps)([bwdq])$/ e_op = $1 sz = { 'b' => 1, 'w' => 2, 'd' => 4, 'q' => 8 }[$2] eax_ = self.class::Reg.new(0, 8*sz).symbolic dir = :+ if di.block and (di.block.list.find { |ddi| ddi.opcode.name == 'std' } rescue nil) dir = :- end pesi = Indirection[esi, sz, di.address] pedi = Indirection[edi, sz, di.address] pfx = di.instruction.prefix || {} bd = case e_op when 'movs' case pfx[:rep] when nil; { pedi => pesi, esi => Expression[esi, dir, sz], edi => Expression[edi, dir, sz] } else { pedi => pesi, esi => Expression[esi, dir, [sz ,:*, ecx]], edi => Expression[edi, dir, [sz, :*, ecx]], ecx => 0 } end when 'stos' case pfx[:rep] when nil; { pedi => Expression[eax_], edi => Expression[edi, dir, sz] } else { pedi => Expression[eax_], edi => Expression[edi, dir, [sz, :*, ecx]], ecx => 0 } end when 'lods' case pfx[:rep] when nil; { eax_ => pesi, esi => Expression[esi, dir, sz] } else { eax_ => Indirection[[esi, dir, [sz, :*, [ecx, :-, 1]]], sz, di.address], esi => Expression[esi, dir, [sz, :*, ecx]], ecx => 0 } end when 'scas' case pfx[:rep] when nil; { edi => Expression[edi, dir, sz], :eflag_z => Expression[pedi, :==, Expression[eax, :&, (1 << (sz*8))-1]] } else { edi => Expression::Unknown, ecx => Expression::Unknown } end when 'cmps' case pfx[:rep] when nil; { edi => Expression[edi, dir, sz], esi => Expression[esi, dir, sz] } else { edi => Expression::Unknown, esi => Expression::Unknown, ecx => Expression::Unknown } end end bd[:incomplete_binding] = Expression[1] if pfx[:rep] bd } when 'clc'; lambda { |di| { :eflag_c => Expression[0] } } when 'stc'; lambda { |di| { :eflag_c => Expression[1] } } when 'cmc'; lambda { |di| { :eflag_c => Expression[:'!', :eflag_c] } } when 'cld'; lambda { |di| { :eflag_d => Expression[0] } } when 'std'; lambda { |di| { :eflag_d => Expression[1] } } when 'setalc'; lambda { |di| { Reg.new(0, 8).symbolic => Expression[:eflag_c, :*, 0xff] } } when /^set/; lambda { |di, *a| { a[0] => Expression[decode_cc_to_expr(op[/^set(.*)/, 1])] } } when /^cmov/; lambda { |di, *a| fl = decode_cc_to_expr(op[/^cmov(.*)/, 1]) ; { a[0] => Expression[[fl, :*, a[1]], :|, [[1, :-, fl], :*, a[0]]] } } when /^j/ lambda { |di, a0| ret = { 'dummy_metasm_0' => Expression[a0] } # mark modr/m as read if fl = decode_cc_to_expr(op[/^j(.*)/, 1]) and fl != Expression::Unknown ret['dummy_metasm_1'] = fl # mark eflags as read end ret } when 'fstenv', 'fnstenv' lambda { |di, a0| # stores the address of the last non-control fpu instr run lastfpuinstr = di.block.list[0...di.block.list.index(di)].reverse.find { |pdi| case pdi.opcode.name when /fn?init|fn?clex|fldcw|fn?st[cs]w|fn?stenv|fldenv|fn?save|frstor|f?wait/ when /^f/; true end } if di.block lastfpuinstr = lastfpuinstr.address if lastfpuinstr ret = {} save_at = lambda { |off, val| ret[Indirection[a0.target + off, 4, di.address]] = val } save_at[0, Expression::Unknown] save_at[4, Expression::Unknown] save_at[8, Expression::Unknown] save_at[12, lastfpuinstr || Expression::Unknown] save_at[16, Expression::Unknown] save_at[20, Expression::Unknown] save_at[24, Expression::Unknown] ret } when 'bt'; lambda { |di, a0, a1| { :eflag_c => Expression[[a0, :>>, [a1, :%, opsz(di)]], :&, 1] } } when 'bts'; lambda { |di, a0, a1| { :eflag_c => Expression[[a0, :>>, [a1, :%, opsz(di)]], :&, 1], a0 => Expression[a0, :|, [1, :<<, [a1, :%, opsz(di)]]] } } when 'btr'; lambda { |di, a0, a1| { :eflag_c => Expression[[a0, :>>, [a1, :%, opsz(di)]], :&, 1], a0 => Expression[a0, :&, [[1, :<<, [a1, :%, opsz(di)]], :^, mask[di]]] } } when 'btc'; lambda { |di, a0, a1| { :eflag_c => Expression[[a0, :>>, [a1, :%, opsz(di)]], :&, 1], a0 => Expression[a0, :^, [1, :<<, [a1, :%, opsz(di)]]] } } when 'bswap' lambda { |di, a0| case opsz(di) when 64 { a0 => Expression[ [[[[a0, :&, 0xff000000_00000000], :>>, 56], :|, [[a0, :&, 0x00ff0000_00000000], :>>, 40]], :|, [[[a0, :&, 0x0000ff00_00000000], :>>, 24], :|, [[a0, :&, 0x000000ff_00000000], :>>, 8]]], :|, [[[[a0, :&, 0x00000000_ff000000], :<<, 8], :|, [[a0, :&, 0x00000000_00ff0000], :<<, 24]], :|, [[[a0, :&, 0x00000000_0000ff00], :<<, 40], :|, [[a0, :&, 0x00000000_000000ff], :<<, 56]]]] } when 32 { a0 => Expression[ [[[a0, :&, 0xff000000], :>>, 24], :|, [[a0, :&, 0x00ff0000], :>>, 8]], :|, [[[a0, :&, 0x0000ff00], :<<, 8], :|, [[a0, :&, 0x000000ff], :<<, 24]]] } when 16 # bswap ax => mov ax, 0 { a0 => 0 } end } when 'movdqa', 'movdqu', 'movaps', 'movups'; lambda { |di, a0, a1| { a0 => Expression[a1] } } when 'cmpxchg'; lambda { |di, a0, a1| # eax == a0 ? a0 <= a1, zf <= 1 : eax <= a0, zf <= 0 eax_ = self.class::Reg.new(0, opsz(di)).symbolic cmp = Expression[eax_, :==, a0] { :eflag_z => cmp, eax_ => Expression[[cmp, :*, eax_], :|, [[1, :-, cmp], :*, a0]], a0 => Expression[[cmp, :*, a1], :|, [[1, :-, cmp], :*, a0]] } } when 'cmpxchg8b', 'cmpxchg16b'; lambda { |di, a0| # edx:eax == mem ? mem <= ecx:ebx, zf <= 1 : edx:eax <= mem, zf <= 0 sz = (di.opcode.name =~ /8b/ ? 32 : 64) eax_ = self.class::Reg.new(0, sz).symbolic ecx_ = self.class::Reg.new(1, sz).symbolic edx_ = self.class::Reg.new(2, sz).symbolic ebx_ = self.class::Reg.new(3, sz).symbolic cmp = Expression[[[edx_, :<<, sz], :|, eax_], :==, a0] { :eflag_z => cmp, eax_ => Expression[[cmp, :*, eax_], :|, [[1, :-, cmp], :*, [a0, :&, (1 << sz) - 1]]], edx_ => Expression[[cmp, :*, edx_], :|, [[1, :-, cmp], :*, [a0, :>>, sz]]], a0 => Expression[[cmp, :*, [[ecx_, :<<, sz], :|, ebx_]], :|, [[1, :-, cmp], :*, a0]] } } when 'nop', 'pause', 'wait', 'cmp', 'test'; lambda { |di, *a| {} } end # add eflags side-effects full_binding = case op when 'adc', 'add', 'and', 'cmp', 'or', 'sbb', 'sub', 'xor', 'test', 'xadd' lambda { |di, a0, a1| e_op = { 'adc' => :+, 'add' => :+, 'xadd' => :+, 'and' => :&, 'cmp' => :-, 'or' => :|, 'sbb' => :-, 'sub' => :-, 'xor' => :^, 'test' => :& }[op] res = Expression[[a0, :&, mask[di]], e_op, [a1, :&, mask[di]]] res = Expression[res, e_op, :eflag_c] if op == 'adc' or op == 'sbb' ret = (binding ? binding[di, a0, a1] : {}) ret[:eflag_z] = Expression[[res, :&, mask[di]], :==, 0] ret[:eflag_s] = sign[res, di] ret[:eflag_c] = case e_op when :+; Expression[res, :>, mask[di]] when :-; Expression[[a0, :&, mask[di]], :<, [a1, :&, mask[di]]] else Expression[0] end ret[:eflag_o] = case e_op when :+; Expression[[sign[a0, di], :==, sign[a1, di]], :'&&', [sign[a0, di], :'!=', sign[res, di]]] when :-; Expression[[sign[a0, di], :==, [:'!', sign[a1, di]]], :'&&', [sign[a0, di], :'!=', sign[res, di]]] else Expression[0] end ret } when 'inc', 'dec', 'neg', 'shl', 'shr', 'sal', 'sar', 'ror', 'rol', 'rcr', 'rcl', 'shld', 'shrd' lambda { |di, a0, *a| ret = (binding ? binding[di, a0, *a] : {}) res = ret[a0] || Expression::Unknown ret[:eflag_z] = Expression[[res, :&, mask[di]], :==, 0] ret[:eflag_s] = sign[res, di] case op when 'neg'; ret[:eflag_c] = Expression[[res, :&, mask[di]], :'!=', 0] when 'inc', 'dec' # don't touch carry flag when 'shr', 'sar', 'shrd'; ret[:eflag_c] = Expression[[a0, :>>, [a[0], :-, 1]], :&, 1] # XXX shr 0 => no touch flag when 'shl', 'sal', 'shld'; ret[:eflag_c] = Expression[[a0, :>>, [di.instruction.args[0].sz, :-, a[0]]], :&, 1] else ret[:eflag_c] = Expression::Unknown # :incomplete_binding ? end ret[:eflag_o] = case op when 'inc'; Expression[[a0, :&, mask[di]], :==, mask[di] >> 1] when 'dec'; Expression[[res , :&, mask[di]], :==, mask[di] >> 1] when 'neg'; Expression[[a0, :&, mask[di]], :==, (mask[di]+1) >> 1] else Expression::Unknown end ret } when 'imul', 'mul', 'idiv', 'div', /^(scas|cmps)[bwdq]$/ lambda { |di, *a| ret = (binding ? binding[di, *a] : {}) ret[:eflag_z] ||= Expression::Unknown ret[:eflag_s] ||= Expression::Unknown ret[:eflag_c] ||= Expression::Unknown ret[:eflag_o] ||= Expression::Unknown # :incomplete_binding ? ret } end @backtrace_binding[op] ||= full_binding || binding if full_binding || binding } @backtrace_binding end # returns the condition (bool Expression) under which a conditional jump is taken # returns nil if not a conditional jump # backtrace for the condition must include the jump itself (eg loop -> ecx--) def get_jump_condition(di) ecx = register_symbols[1] case di.opcode.name when /^j(.*)/ decode_cc_to_expr($1) when /^loop(.+)?/ e = Expression[ecx, :'!=', 0] e = Expression[e, :'||', decode_cc_to_expr($1)] if $1 e end end def get_backtrace_binding(di) a = di.instruction.args.map { |arg| symbolic(arg, di) } if binding = backtrace_binding[di.opcode.basename] bd = binding[di, *a] # handle modifications to al/ah etc bd.keys.grep(Expression).each { |e| # must be in the form (x & mask), with x either :reg or (:reg >> shift) eg ah == ((eax >> 8) & 0xff) if e.op == :& and mask = e.rexpr and mask.kind_of? Integer reg = e.lexpr reg = reg.lexpr if reg.kind_of? Expression and reg.op == :>> and shift = reg.rexpr and shift.kind_of? Integer next if not reg.kind_of? Symbol if bd.has_key? reg # xchg ah, al ; pop sp.. puts "backtrace: conflict for #{di}: #{e} vs #{reg}" if $VERBOSE bd[reg] = Expression::Unknown next end val = bd.delete e mask <<= shift if shift invmask = mask ^ (@size == 64 ? 0xffff_ffff_ffff_ffff : 0xffff_ffff) if invmask == 0xffff_ffff_0000_0000 and not di.opcode.props[:op32no64] bd[reg] = Expression[val, :&, 0xffff_ffff] elsif invmask == 0 bd[reg] = val else val = Expression[val, :<<, shift] if shift bd[reg] = Expression[[reg, :&, invmask], :|, [val, :&, mask]] end end } bd else puts "unhandled instruction to backtrace: #{di}" if $VERBOSE # assume nothing except the 1st arg is modified case a[0] when Indirection, Symbol; { a[0] => Expression::Unknown } when Expression; (x = a[0].externals.first) ? { x => Expression::Unknown } : {} else {} end.update(:incomplete_binding => Expression[1]) end end # patch a forward binding from the backtrace binding # fixes fwdemu for push/pop/call/ret def fix_fwdemu_binding(di, fbd) if di.instruction.args.grep(ModRM).find { |m| m.seg and m.symbolic(di).target.lexpr =~ /^segment_base_/ } fbd = fbd.dup fbd[:incomplete_binding] = Expression[1] end case di.opcode.name when /^push/, 'call' ori = fbd fbd = {} sz = opsz(di)/8 esp = register_symbols[4] if ori[esp] and ori[Indirection[esp, sz]] ori.each { |k, v| if k.kind_of?(Indirection) fbd[k.bind(esp => ori[esp]).reduce_rec] = v else fbd[k] = v end } else fbd = ori.dup fbd[:incomplete_binding] = Expression[1] # TODO end when /^pop/, 'ret' # nothing to do when /^(push|pop|call|ret|enter|leave|stos|movs|lods|scas|cmps)/ fbd = fbd.dup fbd[:incomplete_binding] = Expression[1] # TODO end fbd end def get_xrefs_x(dasm, di) return [] if not di.opcode.props[:setip] sz = opsz(di) case di.opcode.basename when 'ret'; return [Indirection[register_symbols[4], sz/8, di.address]] when 'jmp', 'call' if dasm and not di.instruction.args.first.kind_of?(Expression) and switch_table = get_xrefs_x_jmptable(dasm, di) return switch_table end end case tg = di.instruction.args.first when ModRM tg.sz ||= sz if tg.kind_of? ModRM [Expression[tg.symbolic(di)]] when Reg; [Expression[tg.symbolic(di)]] when Expression, ::Integer; [Expression[tg]] when Farptr; tg.seg.reduce < 0x30 ? [tg.addr] : [Expression[[tg.seg, :*, 0x10], :+, tg.addr]] else puts "unhandled setip at #{Expression[di.address]} #{di.instruction}" if $DEBUG [] end end # indirect call, try to match a switch table pattern (eg jmp [base+4*idx]) # return a list of target addresses if found, nil otherwise def get_xrefs_x_jmptable(dasm, di) puts "search jmptable for #{Expression[di.address]} #{di.instruction}" if $DEBUG arg0 = di.instruction.args.first.symbolic(di) bt_log = [] dasm.backtrace(arg0, di.address, :maxdepth => 3, :log => bt_log) expr = nil index = nil index_max = nil bt_log.each { |btl| next if btl[0] != :up last = dasm.di_at(btl[4]) break if not last or last.block.to_normal.length > 2 next if last.block.to_normal.length != 2 # search cmp eax, 42 ; ja too_big ; jmp [base+4*eax] # XXX 256 cases switch => no cmp... prelast = last.block.list.reverse.find { |pl| pl.opcode.name == 'cmp' } break unless prelast and cmp_value = prelast.instruction.args.last and cmp_value.kind_of?(Expression) and cmp_value.reduce.kind_of?(::Integer) cmp_value = cmp_value.reduce % (1 << prelast.instruction.args.first.sz) # cmp al, -12h ; jnbe => -12h is unsigned 0eeh index = prelast.instruction.args.first.symbolic(prelast) index = index.externals.first if index.kind_of?(Expression) # cmp bl, 13 => ebx expr = Expression[btl[1], :&, ((1 << @size) - 1)] # XXX without the mask, additions may overflow (this breaks elsewhere too, need Expr32) (expr.externals.grep(Symbol) - [index]).uniq.each { |r| rv = dasm.backtrace(r, prelast.address, :maxdepth => 3) expr = expr.bind(r => rv[0]) if rv.length == 1 } cmp_value = prelast.instruction.args.last.reduce % (1 << prelast.instruction.args.first.sz) case last.opcode.name when 'jae', 'jb', 'jnae', 'jnb'; index_max = cmp_value-1 when 'ja', 'jbe', 'jna', 'jnbe'; index_max = cmp_value else; expr = nil end break } if expr and expr.externals.grep(Symbol).uniq == [index] # yay ! # include the symbolic dest for backtrace stuff puts "found jmptable for #{Expression[di.address]} #{di.instruction} (#{index_max+1} entries)" if $VERBOSE # TODO add labels / tables / xrefs etc [Expression[arg0]] + (0..index_max).map { |i| expr.bind(index => i) } end end # checks if expr is a valid return expression matching the :saveip instruction def backtrace_is_function_return(expr, di=nil) expr = Expression[expr].reduce_rec expr.kind_of? Indirection and expr.len == @size/8 and expr.target == Expression[register_symbols[4]] end # updates the function backtrace_binding # if the function is big and no specific register is given, do nothing (the binding will be lazily updated later, on demand) # XXX assume retaddrlist is either a list of addr of ret or a list with a single entry which is an external function name (thunk) def backtrace_update_function_binding(dasm, faddr, f, retaddrlist, *wantregs) b = f.backtrace_binding esp, ebp = register_symbols[4, 2] # XXX handle retaddrlist for multiple/mixed thunks if retaddrlist and not dasm.decoded[retaddrlist.first] and di = dasm.decoded[faddr] # no return instruction, must be a thunk : find the last instruction (to backtrace from it) done = [] while ndi = dasm.decoded[di.block.to_subfuncret.to_a.first] || dasm.decoded[di.block.to_normal.to_a.first] and ndi.kind_of? DecodedInstruction and not done.include? ndi.address done << ndi.address di = ndi end if not di.block.to_subfuncret.to_a.first and di.block.to_normal and di.block.to_normal.length > 1 thunklast = di.block.list.last.address end end bt_val = lambda { |r| next if not retaddrlist b[r] = Expression::Unknown # TODO :pending or something ? (for recursive lazy functions) bt = [] retaddrlist.each { |retaddr| bt |= dasm.backtrace(Expression[r], (thunklast ? thunklast : retaddr), :include_start => true, :snapshot_addr => faddr, :origin => retaddr, :from_subfuncret => thunklast) } if bt.length != 1 b[r] = Expression::Unknown else b[r] = bt.first end } if not wantregs.empty? wantregs.each(&bt_val) else if dasm.function_blocks(faddr, true).length < 20 register_symbols.each(&bt_val) else [ebp, esp].each(&bt_val) end end backtrace_update_function_binding_check(dasm, faddr, f, b, &bt_val) b end def backtrace_update_function_binding_check(dasm, faddr, f, b) sz = @size/8 if b[:ebp] and b[:ebp] != Expression[:ebp] # may be a custom 'enter' function (eg recent Visual Studio) # TODO put all memory writes in the binding ? [[:ebp], [:esp, :+, 1*sz], [:esp, :+, 2*sz], [:esp, :+, 3*sz]].each { |ptr| ind = Indirection[ptr, sz, faddr] yield(ind) b.delete(ind) if b[ind] and not [:ebx, :edx, :esi, :edi, :ebp].include? b[ind].reduce_rec } end if dasm.funcs_stdabi if b[:esp] and b[:esp] == Expression::Unknown and not f.btbind_callback puts "update_func_bind: #{Expression[faddr]} has esp -> unknown, use dynamic callback" if $DEBUG f.btbind_callback = disassembler_default_btbind_callback end [:ebp, :ebx, :esi, :edi].each { |reg| if b[reg] and b[reg] == Expression::Unknown puts "update_func_bind: #{Expression[faddr]} has #{reg} -> unknown, presume it is preserved" if $DEBUG b[reg] = Expression[reg] end } else if b[:esp] and not Expression[b[:esp], :-, :esp].reduce.kind_of?(::Integer) puts "update_func_bind: #{Expression[faddr]} has esp -> #{b[:esp]}" if $DEBUG end end # rename some functions # TODO database and real signatures rename = if b[:eax] and Expression[b[:eax], :-, faddr].reduce == 0 'geteip' # metasm pic linker elsif b[:eax] and b[:ebx] and Expression[b[:eax], :-, :eax].reduce == 0 and Expression[b[:ebx], :-, Indirection[:esp, sz, nil]].reduce == 0 'get_pc_thunk_ebx' # elf pic convention elsif b[:esp] and Expression[b[:esp], :-, [:esp, :-, Indirection[[:esp, :+, 2*sz], sz]]].reduce.kind_of? ::Integer and dasm.decoded[faddr].block.list.find { |di| di.backtrace_binding[Indirection['segment_base_fs', sz]] } '__SEH_prolog' elsif b[:esp] == Expression[:ebp, :+, sz] and dasm.decoded[faddr].block.list.find { |di| di.backtrace_binding[Indirection['segment_base_fs', sz]] } '__SEH_epilog' end dasm.auto_label_at(faddr, rename, 'loc', 'sub') if rename end # returns true if the expression is an address on the stack def backtrace_is_stack_address(expr) Expression[expr].expr_externals.include? register_symbols[4] end # updates an instruction's argument replacing an expression with another (eg label renamed) def replace_instr_arg_immediate(i, old, new) i.args.map! { |a| case a when Expression; a == old ? new : Expression[a.bind(old => new).reduce] when ModRM a.imm = (a.imm == old ? new : Expression[a.imm.bind(old => new).reduce]) if a.imm a else a end } end # returns a DecodedFunction from a parsed C function prototype # TODO rebacktrace already decoded functions (load a header file after dasm finished) # TODO walk structs args def decode_c_function_prototype(cp, sym, orig=nil) sym = cp.toplevel.symbol[sym] if sym.kind_of?(::String) df = DecodedFunction.new orig ||= Expression[sym.name] new_bt = lambda { |expr, rlen| df.backtracked_for << BacktraceTrace.new(expr, orig, expr, rlen ? :r : :x, rlen) } # return instr emulation if sym.has_attribute 'noreturn' or sym.has_attribute '__noreturn__' df.noreturn = true else new_bt[Indirection[:esp, @size/8, orig], nil] end # register dirty (XXX assume standard ABI) [:eax, :ecx, :edx].each { |r| df.backtrace_binding.update r => Expression::Unknown } # emulate ret al = cp.typesize[:ptr] stackoff = al if sym.has_attribute 'fastcall' stackoff = sym.type.args.to_a[2..-1].to_a.inject(al) { |sum, a| sum += (cp.sizeof(a) + al - 1) / al * al } elsif sym.has_attribute 'stdcall' stackoff = sym.type.args.to_a.inject(al) { |sum, a| sum += (cp.sizeof(a) + al - 1) / al * al } end df.backtrace_binding[:esp] = Expression[:esp, :+, stackoff] # scan args for function pointers # TODO walk structs/unions.. stackoff = al sym.type.args.to_a.each { |a| p = Indirection[[:esp, :+, stackoff], al, orig] stackoff += (cp.sizeof(a) + al - 1) / al * al if a.type.untypedef.kind_of? C::Pointer pt = a.type.untypedef.type.untypedef if pt.kind_of? C::Function new_bt[p, nil] df.backtracked_for.last.detached = true elsif pt.kind_of? C::Struct new_bt[p, al] else new_bt[p, cp.sizeof(nil, pt)] end end } df end # the lambda for the :default backtrace_binding callback of the disassembler # tries to determine the stack offset of unprototyped functions # working: # checks that origin is a ret, that expr is an indirection from esp and that expr.origin is the ret # bt_walk from calladdr until we finds a call into us, and assumes it is the current function start # TODO handle foo: call bar ; bar: pop eax ; call ; ret -> bar is not the function start (foo is) # then backtrace expr from calladdr to funcstart (snapshot), using esp -> esp+ # from the result, compute stackoffvariable (only if trivial) # will not work if the current function calls any other unknown function (unless all are __cdecl) # will not work if the current function is framed (ebp leave ret): in this case the function will return, but its esp will be unknown # if the stack offset is found and funcaddr is a string, fixup the static binding and remove the dynamic binding # TODO dynamise thunks bt_for & bt_cb def disassembler_default_btbind_callback esp = register_symbols[4] lambda { |dasm, bind, funcaddr, calladdr, expr, origin, maxdepth| @dasm_func_default_off ||= {} if off = @dasm_func_default_off[[dasm, calladdr]] bind = bind.merge(esp => Expression[esp, :+, off]) break bind end break bind if not odi = dasm.decoded[origin] or odi.opcode.basename != 'ret' expr = expr.reduce_rec if expr.kind_of? Expression break bind unless expr.kind_of? Indirection and expr.origin == origin break bind unless expr.externals.reject { |e| e =~ /^autostackoffset_/ } == [esp] curfunc = dasm.function[funcaddr] if curfunc.backtrace_binding and tk = curfunc.backtrace_binding[:thunk] and dasm.function[tk] curfunc = dasm.function[tk] end # scan from calladdr for the probable parent function start func_start = nil dasm.backtrace_walk(true, calladdr, false, false, nil, maxdepth) { |ev, foo, h| if ev == :up and h[:sfret] != :subfuncret and di = dasm.decoded[h[:to]] and di.opcode.basename == 'call' func_start = h[:from] break elsif ev == :end # entrypoints are functions too func_start = h[:addr] break end } break bind if not func_start puts "automagic #{Expression[funcaddr]}: found func start for #{dasm.decoded[origin]} at #{Expression[func_start]}" if dasm.debug_backtrace s_off = "autostackoffset_#{Expression[funcaddr]}_#{Expression[calladdr]}" list = dasm.backtrace(expr.bind(esp => Expression[esp, :+, s_off]), calladdr, :include_start => true, :snapshot_addr => func_start, :maxdepth => maxdepth, :origin => origin) # check if this backtrace made us find our binding if off = @dasm_func_default_off[[dasm, calladdr]] bind = bind.merge(esp => Expression[esp, :+, off]) break bind elsif not curfunc.btbind_callback break curfunc.backtrace_binding end e_expr = list.find { |e_expr_| # TODO cleanup this e_expr_ = Expression[e_expr_].reduce_rec next if not e_expr_.kind_of? Indirection off = Expression[[esp, :+, s_off], :-, e_expr_.target].reduce off.kind_of? Integer and off >= @size/8 and off < 10*@size/8 and (off % (@size/8)) == 0 } || list.first e_expr = e_expr.rexpr if e_expr.kind_of? Expression and e_expr.op == :+ and not e_expr.lexpr break bind unless e_expr.kind_of? Indirection off = Expression[[esp, :+, s_off], :-, e_expr.target].reduce if off.kind_of? Expression bd = off.externals.grep(/^autostackoffset_/).inject({}) { |bd_, xt| bd_.update xt => @size/8 } bd.delete s_off if off.bind(bd).reduce == @size/8 # all __cdecl off = @size/8 else # check if all calls are to the same extern func bd.delete_if { |k, v| k !~ /^autostackoffset_#{Expression[funcaddr]}_/ } bd.each_key { |k| bd[k] = 0 } if off.bind(bd).reduce.kind_of? Integer off = off.bind(bd).reduce / (bd.length + 1) end end end if off.kind_of? Integer if off < @size/8 or off > 20*@size/8 or (off % (@size/8)) != 0 puts "autostackoffset: ignoring off #{off} for #{Expression[funcaddr]} from #{dasm.decoded[calladdr]}" if $VERBOSE off = :unknown end end bind = bind.merge esp => Expression[esp, :+, off] if off != :unknown if funcaddr != :default if not off.kind_of? ::Integer #XXX we allow the current function to return, so we should handle the func backtracking its esp #(and other register that are saved and restored in epilog) puts "stackoff #{dasm.decoded[calladdr]} | #{Expression[func_start]} | #{expr} | #{e_expr} | #{off}" if dasm.debug_backtrace else puts "autostackoffset: found #{off} for #{Expression[funcaddr]} from #{dasm.decoded[calladdr]}" if $VERBOSE curfunc.btbind_callback = nil curfunc.backtrace_binding = bind # rebacktrace the return address, so that other unknown funcs that depend on us are solved dasm.backtrace(Indirection[esp, @size/8, origin], origin, :origin => origin) end else if off.kind_of? ::Integer and dasm.decoded[calladdr] puts "autostackoffset: found #{off-@size/8} for #{dasm.decoded[calladdr]}" if $VERBOSE di = dasm.decoded[calladdr] di.comment.delete_if { |c| c =~ /^stackoff=/ } if di.comment di.add_comment "stackoff=#{off-@size/8}" @dasm_func_default_off[[dasm, calladdr]] = off dasm.backtrace(Indirection[esp, @size/8, origin], origin, :origin => origin) elsif cachedoff = @dasm_func_default_off[[dasm, calladdr]] bind[esp] = Expression[esp, :+, cachedoff] elsif off.kind_of? ::Integer dasm.decoded[calladdr].add_comment "stackoff=#{off-@size/8}" end puts "stackoff #{dasm.decoded[calladdr]} | #{Expression[func_start]} | #{expr} | #{e_expr} | #{off}" if dasm.debug_backtrace end bind } end # the :default backtracked_for callback # returns empty unless funcaddr is not default or calladdr is a call or a jmp def disassembler_default_btfor_callback lambda { |dasm, btfor, funcaddr, calladdr| if funcaddr != :default; btfor elsif di = dasm.decoded[calladdr] and (di.opcode.name == 'call' or di.opcode.name == 'jmp'); btfor else [] end } end # returns a DecodedFunction suitable for :default # uses disassembler_default_bt{for/bind}_callback def disassembler_default_func esp = register_symbols[4] cp = new_cparser cp.parse 'void stdfunc(void);' f = decode_c_function_prototype(cp, 'stdfunc', :default) f.backtrace_binding[esp] = Expression[esp, :+, :unknown] f.btbind_callback = disassembler_default_btbind_callback f.btfor_callback = disassembler_default_btfor_callback f end # returns a hash { :retval => r, :changed => [] } def abi_funcall { :retval => register_symbols[0], :changed => register_symbols[0, 3] } end # computes the binding of the sequence of code starting at entry included # the binding is a hash showing the value of modified elements at the # end of the code sequence, relative to their value at entry # the elements are all the registers and the memory written to # if finish is nil, the binding will include :ip, which is the address # to be executed next (if it exists) # the binding will not include memory access from subfunctions # entry should be an entrypoint of the disassembler if finish is nil # the code sequence must have only one end, with no to_normal # options: # :include_flags => include EFLAGS in the returned binding def code_binding(dasm, entry, finish=nil, nargs={}) include_flags = nargs.delete :include_flags entry = dasm.normalize(entry) finish = dasm.normalize(finish) if finish lastdi = nil bd = {} bt = lambda { |from, expr, inc_start| ret = dasm.backtrace(Expression[expr], from, :snapshot_addr => entry, :include_start => inc_start) ret.length == 1 ? ret.first : Expression::Unknown } # walk blocks, search for finish, scan memory writes todo = [entry] done = [Expression::Unknown] while addr = todo.pop addr = dasm.normalize(addr) next if done.include? addr or addr == finish or not dasm.decoded[addr].kind_of? DecodedInstruction done << addr b = dasm.decoded[addr].block next if b.list.find { |di| a = di.address if a == finish lastdi = b.list[b.list.index(di) - 1] true else # check writes from the instruction get_xrefs_w(dasm, di).each { |waddr, len| # we want the ptr expressed with reg values at entry ptr = bt[a, waddr, false] bd[Indirection[ptr, len, a]] = bt[a, Indirection[waddr, len, a], true] } false end } hasnext = false b.each_to_samefunc(dasm) { |t| hasnext = true if t == finish lastdi = b.list.last else todo << t end } # check end of sequence if not hasnext raise "two-ended code_binding #{lastdi} & #{b.list.last}" if lastdi lastdi = b.list.last if lastdi.opcode.props[:setip] e = get_xrefs_x(dasm, lastdi) raise 'bad code_binding ending' if e.to_a.length != 1 or not lastdi.opcode.props[:stopexec] bd[:ip] = bt[lastdi.address, e.first, false] elsif not lastdi.opcode.props[:stopexec] bd[:ip] = lastdi.next_addr end end end bd.delete_if { |k, v| Expression[k] == Expression[v] } # add register binding raise "no code_binding end" if not lastdi and not finish register_symbols.each { |reg| val = if lastdi; bt[lastdi.address, reg, true] else bt[finish, reg, false] end next if val == Expression[reg] mask = 0xffff_ffff # dont use 1<<@size, because 16bit code may use e.g. edi (through opszoverride) mask = 0xffff_ffff_ffff_ffff if @size == 64 val = Expression[val, :&, mask].reduce bd[reg] = Expression[val] } # add EFLAGS binding if include_flags [:eflag_z, :eflag_s, :eflag_c, :eflag_o].each { |eflag| val = if lastdi; bt[lastdi.address, eflag, true] else bt[finish, eflag, false] end next if val == Expression[eflag] bd[eflag] = Expression[val.reduce] } end bd end # trace the stack pointer register across a function, rename occurences of esp+XX to esp+var_XX def name_local_vars(dasm, funcaddr) esp = register_symbols[4] func = dasm.function[funcaddr] subs = [] dasm.trace_function_register(funcaddr, esp => 0) { |di, r, off, trace| next if r.to_s =~ /flag/ if di.opcode.name == 'call' and tf = di.block.to_normal.find { |t| dasm.function[t] and dasm.function[t].localvars } subs << [trace[esp], dasm.function[tf].localvars] end di.instruction.args.grep(ModRM).each { |mrm| b = mrm.b || (mrm.i if mrm.s == 1) # its a modrm => b is read, so ignore r/off (not yet applied), use trace only stackoff = trace[b.symbolic] if b next if not stackoff imm = mrm.imm || Expression[0] frameoff = imm + stackoff if frameoff.kind_of?(::Integer) # XXX register args ? non-ABI standard register args ? (eg optimized x64) str = 'var_%X' % (-frameoff) str = 'arg_%X' % (frameoff-@size/8) if frameoff > 0 str = func.get_localvar_stackoff(frameoff, di, str) if func imm = imm.expr if imm.kind_of?(ExpressionString) mrm.imm = ExpressionString.new(imm, str, :stackvar) end } off = off.reduce if off.kind_of?(Expression) next unless off.kind_of?(Integer) off } # if subfunctions are called at a fixed stack offset, rename var_3c -> subarg_0 if func and func.localvars and not subs.empty? and subs.all? { |sb| sb[0] == subs.first[0] } func.localvars.each { |varoff, varname| subargnames = subs.map { |o, sb| sb[varoff-o+@size/8] }.compact if subargnames.uniq.length == 1 varname.replace 'sub'+subargnames[0] end } end end end end