metasm/cpu/ia32/decode.rb in metasm-1.0.3 vs metasm/cpu/ia32/decode.rb in metasm-1.0.4

- old
+ new

@@ -321,16 +321,10 @@ when 'ecxz'; Expression[:'!', register_symbols[1]] when 'cxz'; Expression[:'!', [register_symbols[1], :&, 0xffff]] end end - # hash opcode_name => lambda { |dasm, di, *symbolic_args| instr_binding } - def backtrace_binding - @backtrace_binding ||= init_backtrace_binding - end - def backtrace_binding=(b) @backtrace_binding = b end - def opsz(di, op=nil) if di and di.instruction.prefix and di.instruction.prefix[:opsz] and (op || di.opcode).props[:needpfx] != 0x66; 48-@size else @size end end @@ -441,20 +435,20 @@ } when 'popfd', 'popf', 'popfq' lambda { |di| bt = lambda { |pos| Expression[[Indirection[esp, opsz(di)/8, di.address], :>>, pos], :&, 1] } { esp => Expression[esp, :+, opsz(di)/8], :eflag_c => bt[0], :eflag_z => bt[6], :eflag_s => bt[7], :eflag_o => bt[11] } } when 'sahf' - lambda { |di| bt = lambda { |pos| Expression[[eax, :>>, pos], :&, 1] } + lambda { |di| bt = lambda { |pos| Expression[[eax, :>>, 8+pos], :&, 1] } { :eflag_c => bt[0], :eflag_z => bt[6], :eflag_s => bt[7] } } when 'lahf' lambda { |di| efl = Expression[2] bts = lambda { |pos, v| efl = Expression[efl, :|, [[v, :&, 1], :<<, pos]] } bts[0, :eflag_c] #bts[2, :eflag_p] #bts[4, :eflag_a] bts[6, :eflag_z] bts[7, :eflag_s] - { eax => efl } + { Expression[[eax, :>>, 8], :&, 0xff] => efl } } when 'pushad' lambda { |di| ret = {} st_off = 0 @@ -661,10 +655,28 @@ when 16 # bswap ax => mov ax, 0 { a0 => 0 } end } + when 'movdqa', 'movdqu', 'movaps', 'movups'; lambda { |di, a0, a1| { a0 => Expression[a1] } } + when 'cmpxchg'; lambda { |di, a0, a1| # eax == a0 ? a0 <= a1, zf <= 1 : eax <= a0, zf <= 0 + eax_ = self.class::Reg.new(0, opsz(di)).symbolic + cmp = Expression[eax_, :==, a0] + { :eflag_z => cmp, + eax_ => Expression[[cmp, :*, eax_], :|, [[1, :-, cmp], :*, a0]], + a0 => Expression[[cmp, :*, a1], :|, [[1, :-, cmp], :*, a0]] } } + when 'cmpxchg8b', 'cmpxchg16b'; lambda { |di, a0| # edx:eax == mem ? mem <= ecx:ebx, zf <= 1 : edx:eax <= mem, zf <= 0 + sz = (di.opcode.name =~ /8b/ ? 32 : 64) + eax_ = self.class::Reg.new(0, sz).symbolic + ecx_ = self.class::Reg.new(1, sz).symbolic + edx_ = self.class::Reg.new(2, sz).symbolic + ebx_ = self.class::Reg.new(3, sz).symbolic + cmp = Expression[[[edx_, :<<, sz], :|, eax_], :==, a0] + { :eflag_z => cmp, + eax_ => Expression[[cmp, :*, eax_], :|, [[1, :-, cmp], :*, [a0, :&, (1 << sz) - 1]]], + edx_ => Expression[[cmp, :*, edx_], :|, [[1, :-, cmp], :*, [a0, :>>, sz]]], + a0 => Expression[[cmp, :*, [[ecx_, :<<, sz], :|, ebx_]], :|, [[1, :-, cmp], :*, a0]] } } when 'nop', 'pause', 'wait', 'cmp', 'test'; lambda { |di, *a| {} } end # add eflags side-effects @@ -688,19 +700,21 @@ when :-; Expression[[sign[a0, di], :==, [:'!', sign[a1, di]]], :'&&', [sign[a0, di], :'!=', sign[res, di]]] else Expression[0] end ret } - when 'inc', 'dec', 'neg', 'shl', 'shr', 'sar', 'ror', 'rol', 'rcr', 'rcl', 'shld', 'shrd' + when 'inc', 'dec', 'neg', 'shl', 'shr', 'sal', 'sar', 'ror', 'rol', 'rcr', 'rcl', 'shld', 'shrd' lambda { |di, a0, *a| ret = (binding ? binding[di, a0, *a] : {}) res = ret[a0] || Expression::Unknown ret[:eflag_z] = Expression[[res, :&, mask[di]], :==, 0] ret[:eflag_s] = sign[res, di] case op when 'neg'; ret[:eflag_c] = Expression[[res, :&, mask[di]], :'!=', 0] when 'inc', 'dec' # don't touch carry flag + when 'shr', 'sar', 'shrd'; ret[:eflag_c] = Expression[[a0, :>>, [a[0], :-, 1]], :&, 1] # XXX shr 0 => no touch flag + when 'shl', 'sal', 'shld'; ret[:eflag_c] = Expression[[a0, :>>, [di.instruction.args[0].sz, :-, a[0]]], :&, 1] else ret[:eflag_c] = Expression::Unknown # :incomplete_binding ? end ret[:eflag_o] = case op when 'inc'; Expression[[a0, :&, mask[di]], :==, mask[di] >> 1] when 'dec'; Expression[[res , :&, mask[di]], :==, mask[di] >> 1] @@ -724,12 +738,12 @@ @backtrace_binding[op] ||= full_binding || binding if full_binding || binding } @backtrace_binding end - # returns the condition (bool Expression) under which a conditionnal jump is taken - # returns nil if not a conditionnal jump + # returns the condition (bool Expression) under which a conditional jump is taken + # returns nil if not a conditional jump # backtrace for the condition must include the jump itself (eg loop -> ecx--) def get_jump_condition(di) ecx = register_symbols[1] case di.opcode.name when /^j(.*)/ @@ -740,16 +754,11 @@ e end end def get_backtrace_binding(di) - a = di.instruction.args.map { |arg| - case arg - when ModRM, Reg, SimdReg; arg.symbolic(di) - else arg - end - } + a = di.instruction.args.map { |arg| symbolic(arg, di) } if binding = backtrace_binding[di.opcode.basename] bd = binding[di, *a] # handle modifications to al/ah etc bd.keys.grep(Expression).each { |e| @@ -796,18 +805,28 @@ fbd = fbd.dup fbd[:incomplete_binding] = Expression[1] end case di.opcode.name - when 'push', 'call' - fbd = fbd.dup + when /^push/, 'call' + ori = fbd + fbd = {} sz = opsz(di)/8 esp = register_symbols[4] - if i = fbd.delete(Indirection[esp, sz]) - fbd[Indirection[[esp, :-, sz], sz]] = i + if ori[esp] and ori[Indirection[esp, sz]] + ori.each { |k, v| + if k.kind_of?(Indirection) + fbd[k.bind(esp => ori[esp]).reduce_rec] = v + else + fbd[k] = v + end + } + else + fbd = ori.dup + fbd[:incomplete_binding] = Expression[1] # TODO end - when 'pop', 'ret' # nothing to do + when /^pop/, 'ret' # nothing to do when /^(push|pop|call|ret|enter|leave|stos|movs|lods|scas|cmps)/ fbd = fbd.dup fbd[:incomplete_binding] = Expression[1] # TODO end fbd @@ -818,13 +837,12 @@ sz = opsz(di) case di.opcode.basename when 'ret'; return [Indirection[register_symbols[4], sz/8, di.address]] when 'jmp', 'call' - a = di.instruction.args.first - if dasm and a.kind_of?(ModRM) and a.imm and (a.s == sz/8 or a.s == 4) and not a.b and dasm.get_section_at(a.imm) - return get_xrefs_x_jmptable(dasm, di, a, a.s*8) + if dasm and not di.instruction.args.first.kind_of?(Expression) and switch_table = get_xrefs_x_jmptable(dasm, di) + return switch_table end end case tg = di.instruction.args.first when ModRM @@ -832,74 +850,61 @@ [Expression[tg.symbolic(di)]] when Reg; [Expression[tg.symbolic(di)]] when Expression, ::Integer; [Expression[tg]] when Farptr; tg.seg.reduce < 0x30 ? [tg.addr] : [Expression[[tg.seg, :*, 0x10], :+, tg.addr]] else - puts "unhandled setip at #{di.address} #{di.instruction}" if $DEBUG + puts "unhandled setip at #{Expression[di.address]} #{di.instruction}" if $DEBUG [] end end - # we detected a jmp table (jmp [base+4*idx]) - # try to return an accurate dest list - def get_xrefs_x_jmptable(dasm, di, mrm, sz) - # include the symbolic dest for backtrack stuff - ret = [Expression[mrm.symbolic(di)]] - i = mrm.i - if di.block.list.length == 2 and di.block.list[0].opcode.name =~ /^mov/ and a0 = di.block.list[0].instruction.args[0] and - a0.respond_to? :symbolic and a0.symbolic == i.symbolic - i = di.block.list[0].instruction.args[1] - end - pb = di.block.from_normal.to_a - if pb.length == 1 and pdi = dasm.decoded[pb[0]] and pdi.opcode.name =~ /^jn?be?/ and ppdi = pdi.block.list[-2] and ppdi.opcode.name == 'cmp' and - ppdi.instruction.args[0].symbolic == i.symbolic and lim = Expression[ppdi.instruction.args[1]].reduce and lim.kind_of? Integer - # cmp eax, 42 ; jbe switch ; switch: jmp [base+4*eax] - s = dasm.get_section_at(mrm.imm) - lim += 1 if pdi.opcode.name[-1] == ?e - lim.times { |v| - dasm.add_xref(s[1]+s[0].ptr, Xref.new(:r, di.address, sz/8)) - ret << Indirection[[mrm.imm, :+, v*sz/8], sz/8, di.address] - s[0].read(sz/8) + # indirect call, try to match a switch table pattern (eg jmp [base+4*idx]) + # return a list of target addresses if found, nil otherwise + def get_xrefs_x_jmptable(dasm, di) + puts "search jmptable for #{Expression[di.address]} #{di.instruction}" if $DEBUG + arg0 = di.instruction.args.first.symbolic(di) + + bt_log = [] + dasm.backtrace(arg0, di.address, :maxdepth => 3, :log => bt_log) + + expr = nil + index = nil + index_max = nil + + bt_log.each { |btl| + next if btl[0] != :up + last = dasm.di_at(btl[4]) + break if not last or last.block.to_normal.length > 2 + next if last.block.to_normal.length != 2 + # search cmp eax, 42 ; ja too_big ; jmp [base+4*eax] + # XXX 256 cases switch => no cmp... + prelast = last.block.list.reverse.find { |pl| pl.opcode.name == 'cmp' } + break unless prelast and cmp_value = prelast.instruction.args.last and cmp_value.kind_of?(Expression) and cmp_value.reduce.kind_of?(::Integer) + cmp_value = cmp_value.reduce % (1 << prelast.instruction.args.first.sz) # cmp al, -12h ; jnbe => -12h is unsigned 0eeh + index = prelast.instruction.args.first.symbolic(prelast) + index = index.externals.first if index.kind_of?(Expression) # cmp bl, 13 => ebx + expr = Expression[btl[1], :&, ((1 << @size) - 1)] # XXX without the mask, additions may overflow (this breaks elsewhere too, need Expr32) + (expr.externals.grep(Symbol) - [index]).uniq.each { |r| + rv = dasm.backtrace(r, prelast.address, :maxdepth => 3) + expr = expr.bind(r => rv[0]) if rv.length == 1 } - l = dasm.auto_label_at(mrm.imm, 'jmp_table', 'xref') - replace_instr_arg_immediate(di.instruction, mrm.imm, Expression[l]) - # add 'case 1' comments - cases = {} - ret.each_with_index { |ind, idx| - idx -= 1 # ret[0] = symbolic - next if idx < 0 - a = dasm.backtrace(ind, di.address) - if a.length == 1 and a[0].kind_of?(Expression) and addr = a[0].reduce and addr.kind_of?(::Integer) - (cases[addr] ||= []) << idx - end - } - cases.each { |addr, list| - dasm.add_comment(addr, "case #{list.join(', ')}:") - } - return ret - end + cmp_value = prelast.instruction.args.last.reduce % (1 << prelast.instruction.args.first.sz) + case last.opcode.name + when 'jae', 'jb', 'jnae', 'jnb'; index_max = cmp_value-1 + when 'ja', 'jbe', 'jna', 'jnbe'; index_max = cmp_value + else; expr = nil + end + break + } - puts "unrecognized jmp table pattern, using wild guess for #{di}" if $VERBOSE - di.add_comment 'wildguess' - if s = dasm.get_section_at(mrm.imm - 3*sz/8) - v = -3 - else - s = dasm.get_section_at(mrm.imm) - v = 0 + if expr and expr.externals.grep(Symbol).uniq == [index] + # yay ! + # include the symbolic dest for backtrace stuff + puts "found jmptable for #{Expression[di.address]} #{di.instruction} (#{index_max+1} entries)" if $VERBOSE + # TODO add labels / tables / xrefs etc + [Expression[arg0]] + (0..index_max).map { |i| expr.bind(index => i) } end - while s[0].ptr < s[0].length - ptr = dasm.normalize s[0].decode_imm("u#{sz}".to_sym, @endianness) - diff = Expression[ptr, :-, di.address].reduce - if (diff.kind_of? ::Integer and diff.abs < 4096) or (di.opcode.basename == 'call' and ptr != 0 and dasm.get_section_at(ptr)) - dasm.add_xref(s[1]+s[0].ptr-sz/8, Xref.new(:r, di.address, sz/8)) - ret << Indirection[[mrm.imm, :+, v*sz/8], sz/8, di.address] - elsif v > 0 - break - end - v += 1 - end - ret end # checks if expr is a valid return expression matching the :saveip instruction def backtrace_is_function_return(expr, di=nil) expr = Expression[expr].reduce_rec @@ -1237,15 +1242,19 @@ # if finish is nil, the binding will include :ip, which is the address # to be executed next (if it exists) # the binding will not include memory access from subfunctions # entry should be an entrypoint of the disassembler if finish is nil # the code sequence must have only one end, with no to_normal - def code_binding(dasm, entry, finish=nil) + # options: + # :include_flags => include EFLAGS in the returned binding + def code_binding(dasm, entry, finish=nil, nargs={}) + include_flags = nargs.delete :include_flags + entry = dasm.normalize(entry) finish = dasm.normalize(finish) if finish lastdi = nil - binding = {} + bd = {} bt = lambda { |from, expr, inc_start| ret = dasm.backtrace(Expression[expr], from, :snapshot_addr => entry, :include_start => inc_start) ret.length == 1 ? ret.first : Expression::Unknown } @@ -1266,11 +1275,11 @@ else # check writes from the instruction get_xrefs_w(dasm, di).each { |waddr, len| # we want the ptr expressed with reg values at entry ptr = bt[a, waddr, false] - binding[Indirection[ptr, len, a]] = bt[a, Indirection[waddr, len, a], true] + bd[Indirection[ptr, len, a]] = bt[a, Indirection[waddr, len, a], true] } false end } @@ -1289,17 +1298,17 @@ raise "two-ended code_binding #{lastdi} & #{b.list.last}" if lastdi lastdi = b.list.last if lastdi.opcode.props[:setip] e = get_xrefs_x(dasm, lastdi) raise 'bad code_binding ending' if e.to_a.length != 1 or not lastdi.opcode.props[:stopexec] - binding[:ip] = bt[lastdi.address, e.first, false] + bd[:ip] = bt[lastdi.address, e.first, false] elsif not lastdi.opcode.props[:stopexec] - binding[:ip] = lastdi.next_addr + bd[:ip] = lastdi.next_addr end end end - binding.delete_if { |k, v| Expression[k] == Expression[v] } + bd.delete_if { |k, v| Expression[k] == Expression[v] } # add register binding raise "no code_binding end" if not lastdi and not finish register_symbols.each { |reg| val = @@ -1308,13 +1317,25 @@ end next if val == Expression[reg] mask = 0xffff_ffff # dont use 1<<@size, because 16bit code may use e.g. edi (through opszoverride) mask = 0xffff_ffff_ffff_ffff if @size == 64 val = Expression[val, :&, mask].reduce - binding[reg] = Expression[val] + bd[reg] = Expression[val] } - binding + # add EFLAGS binding + if include_flags + [:eflag_z, :eflag_s, :eflag_c, :eflag_o].each { |eflag| + val = + if lastdi; bt[lastdi.address, eflag, true] + else bt[finish, eflag, false] + end + next if val == Expression[eflag] + bd[eflag] = Expression[val.reduce] + } + end + + bd end # trace the stack pointer register across a function, rename occurences of esp+XX to esp+var_XX def name_local_vars(dasm, funcaddr) esp = register_symbols[4]