metasm/cpu/ia32/decode.rb in metasm-1.0.3 vs metasm/cpu/ia32/decode.rb in metasm-1.0.4
- old
+ new
@@ -321,16 +321,10 @@
when 'ecxz'; Expression[:'!', register_symbols[1]]
when 'cxz'; Expression[:'!', [register_symbols[1], :&, 0xffff]]
end
end
- # hash opcode_name => lambda { |dasm, di, *symbolic_args| instr_binding }
- def backtrace_binding
- @backtrace_binding ||= init_backtrace_binding
- end
- def backtrace_binding=(b) @backtrace_binding = b end
-
def opsz(di, op=nil)
if di and di.instruction.prefix and di.instruction.prefix[:opsz] and (op || di.opcode).props[:needpfx] != 0x66; 48-@size
else @size
end
end
@@ -441,20 +435,20 @@
}
when 'popfd', 'popf', 'popfq'
lambda { |di| bt = lambda { |pos| Expression[[Indirection[esp, opsz(di)/8, di.address], :>>, pos], :&, 1] }
{ esp => Expression[esp, :+, opsz(di)/8], :eflag_c => bt[0], :eflag_z => bt[6], :eflag_s => bt[7], :eflag_o => bt[11] } }
when 'sahf'
- lambda { |di| bt = lambda { |pos| Expression[[eax, :>>, pos], :&, 1] }
+ lambda { |di| bt = lambda { |pos| Expression[[eax, :>>, 8+pos], :&, 1] }
{ :eflag_c => bt[0], :eflag_z => bt[6], :eflag_s => bt[7] } }
when 'lahf'
lambda { |di|
efl = Expression[2]
bts = lambda { |pos, v| efl = Expression[efl, :|, [[v, :&, 1], :<<, pos]] }
bts[0, :eflag_c] #bts[2, :eflag_p] #bts[4, :eflag_a]
bts[6, :eflag_z]
bts[7, :eflag_s]
- { eax => efl }
+ { Expression[[eax, :>>, 8], :&, 0xff] => efl }
}
when 'pushad'
lambda { |di|
ret = {}
st_off = 0
@@ -661,10 +655,28 @@
when 16
# bswap ax => mov ax, 0
{ a0 => 0 }
end
}
+ when 'movdqa', 'movdqu', 'movaps', 'movups'; lambda { |di, a0, a1| { a0 => Expression[a1] } }
+ when 'cmpxchg'; lambda { |di, a0, a1| # eax == a0 ? a0 <= a1, zf <= 1 : eax <= a0, zf <= 0
+ eax_ = self.class::Reg.new(0, opsz(di)).symbolic
+ cmp = Expression[eax_, :==, a0]
+ { :eflag_z => cmp,
+ eax_ => Expression[[cmp, :*, eax_], :|, [[1, :-, cmp], :*, a0]],
+ a0 => Expression[[cmp, :*, a1], :|, [[1, :-, cmp], :*, a0]] } }
+ when 'cmpxchg8b', 'cmpxchg16b'; lambda { |di, a0| # edx:eax == mem ? mem <= ecx:ebx, zf <= 1 : edx:eax <= mem, zf <= 0
+ sz = (di.opcode.name =~ /8b/ ? 32 : 64)
+ eax_ = self.class::Reg.new(0, sz).symbolic
+ ecx_ = self.class::Reg.new(1, sz).symbolic
+ edx_ = self.class::Reg.new(2, sz).symbolic
+ ebx_ = self.class::Reg.new(3, sz).symbolic
+ cmp = Expression[[[edx_, :<<, sz], :|, eax_], :==, a0]
+ { :eflag_z => cmp,
+ eax_ => Expression[[cmp, :*, eax_], :|, [[1, :-, cmp], :*, [a0, :&, (1 << sz) - 1]]],
+ edx_ => Expression[[cmp, :*, edx_], :|, [[1, :-, cmp], :*, [a0, :>>, sz]]],
+ a0 => Expression[[cmp, :*, [[ecx_, :<<, sz], :|, ebx_]], :|, [[1, :-, cmp], :*, a0]] } }
when 'nop', 'pause', 'wait', 'cmp', 'test'; lambda { |di, *a| {} }
end
# add eflags side-effects
@@ -688,19 +700,21 @@
when :-; Expression[[sign[a0, di], :==, [:'!', sign[a1, di]]], :'&&', [sign[a0, di], :'!=', sign[res, di]]]
else Expression[0]
end
ret
}
- when 'inc', 'dec', 'neg', 'shl', 'shr', 'sar', 'ror', 'rol', 'rcr', 'rcl', 'shld', 'shrd'
+ when 'inc', 'dec', 'neg', 'shl', 'shr', 'sal', 'sar', 'ror', 'rol', 'rcr', 'rcl', 'shld', 'shrd'
lambda { |di, a0, *a|
ret = (binding ? binding[di, a0, *a] : {})
res = ret[a0] || Expression::Unknown
ret[:eflag_z] = Expression[[res, :&, mask[di]], :==, 0]
ret[:eflag_s] = sign[res, di]
case op
when 'neg'; ret[:eflag_c] = Expression[[res, :&, mask[di]], :'!=', 0]
when 'inc', 'dec' # don't touch carry flag
+ when 'shr', 'sar', 'shrd'; ret[:eflag_c] = Expression[[a0, :>>, [a[0], :-, 1]], :&, 1] # XXX shr 0 => no touch flag
+ when 'shl', 'sal', 'shld'; ret[:eflag_c] = Expression[[a0, :>>, [di.instruction.args[0].sz, :-, a[0]]], :&, 1]
else ret[:eflag_c] = Expression::Unknown # :incomplete_binding ?
end
ret[:eflag_o] = case op
when 'inc'; Expression[[a0, :&, mask[di]], :==, mask[di] >> 1]
when 'dec'; Expression[[res , :&, mask[di]], :==, mask[di] >> 1]
@@ -724,12 +738,12 @@
@backtrace_binding[op] ||= full_binding || binding if full_binding || binding
}
@backtrace_binding
end
- # returns the condition (bool Expression) under which a conditionnal jump is taken
- # returns nil if not a conditionnal jump
+ # returns the condition (bool Expression) under which a conditional jump is taken
+ # returns nil if not a conditional jump
# backtrace for the condition must include the jump itself (eg loop -> ecx--)
def get_jump_condition(di)
ecx = register_symbols[1]
case di.opcode.name
when /^j(.*)/
@@ -740,16 +754,11 @@
e
end
end
def get_backtrace_binding(di)
- a = di.instruction.args.map { |arg|
- case arg
- when ModRM, Reg, SimdReg; arg.symbolic(di)
- else arg
- end
- }
+ a = di.instruction.args.map { |arg| symbolic(arg, di) }
if binding = backtrace_binding[di.opcode.basename]
bd = binding[di, *a]
# handle modifications to al/ah etc
bd.keys.grep(Expression).each { |e|
@@ -796,18 +805,28 @@
fbd = fbd.dup
fbd[:incomplete_binding] = Expression[1]
end
case di.opcode.name
- when 'push', 'call'
- fbd = fbd.dup
+ when /^push/, 'call'
+ ori = fbd
+ fbd = {}
sz = opsz(di)/8
esp = register_symbols[4]
- if i = fbd.delete(Indirection[esp, sz])
- fbd[Indirection[[esp, :-, sz], sz]] = i
+ if ori[esp] and ori[Indirection[esp, sz]]
+ ori.each { |k, v|
+ if k.kind_of?(Indirection)
+ fbd[k.bind(esp => ori[esp]).reduce_rec] = v
+ else
+ fbd[k] = v
+ end
+ }
+ else
+ fbd = ori.dup
+ fbd[:incomplete_binding] = Expression[1] # TODO
end
- when 'pop', 'ret' # nothing to do
+ when /^pop/, 'ret' # nothing to do
when /^(push|pop|call|ret|enter|leave|stos|movs|lods|scas|cmps)/
fbd = fbd.dup
fbd[:incomplete_binding] = Expression[1] # TODO
end
fbd
@@ -818,13 +837,12 @@
sz = opsz(di)
case di.opcode.basename
when 'ret'; return [Indirection[register_symbols[4], sz/8, di.address]]
when 'jmp', 'call'
- a = di.instruction.args.first
- if dasm and a.kind_of?(ModRM) and a.imm and (a.s == sz/8 or a.s == 4) and not a.b and dasm.get_section_at(a.imm)
- return get_xrefs_x_jmptable(dasm, di, a, a.s*8)
+ if dasm and not di.instruction.args.first.kind_of?(Expression) and switch_table = get_xrefs_x_jmptable(dasm, di)
+ return switch_table
end
end
case tg = di.instruction.args.first
when ModRM
@@ -832,74 +850,61 @@
[Expression[tg.symbolic(di)]]
when Reg; [Expression[tg.symbolic(di)]]
when Expression, ::Integer; [Expression[tg]]
when Farptr; tg.seg.reduce < 0x30 ? [tg.addr] : [Expression[[tg.seg, :*, 0x10], :+, tg.addr]]
else
- puts "unhandled setip at #{di.address} #{di.instruction}" if $DEBUG
+ puts "unhandled setip at #{Expression[di.address]} #{di.instruction}" if $DEBUG
[]
end
end
- # we detected a jmp table (jmp [base+4*idx])
- # try to return an accurate dest list
- def get_xrefs_x_jmptable(dasm, di, mrm, sz)
- # include the symbolic dest for backtrack stuff
- ret = [Expression[mrm.symbolic(di)]]
- i = mrm.i
- if di.block.list.length == 2 and di.block.list[0].opcode.name =~ /^mov/ and a0 = di.block.list[0].instruction.args[0] and
- a0.respond_to? :symbolic and a0.symbolic == i.symbolic
- i = di.block.list[0].instruction.args[1]
- end
- pb = di.block.from_normal.to_a
- if pb.length == 1 and pdi = dasm.decoded[pb[0]] and pdi.opcode.name =~ /^jn?be?/ and ppdi = pdi.block.list[-2] and ppdi.opcode.name == 'cmp' and
- ppdi.instruction.args[0].symbolic == i.symbolic and lim = Expression[ppdi.instruction.args[1]].reduce and lim.kind_of? Integer
- # cmp eax, 42 ; jbe switch ; switch: jmp [base+4*eax]
- s = dasm.get_section_at(mrm.imm)
- lim += 1 if pdi.opcode.name[-1] == ?e
- lim.times { |v|
- dasm.add_xref(s[1]+s[0].ptr, Xref.new(:r, di.address, sz/8))
- ret << Indirection[[mrm.imm, :+, v*sz/8], sz/8, di.address]
- s[0].read(sz/8)
+ # indirect call, try to match a switch table pattern (eg jmp [base+4*idx])
+ # return a list of target addresses if found, nil otherwise
+ def get_xrefs_x_jmptable(dasm, di)
+ puts "search jmptable for #{Expression[di.address]} #{di.instruction}" if $DEBUG
+ arg0 = di.instruction.args.first.symbolic(di)
+
+ bt_log = []
+ dasm.backtrace(arg0, di.address, :maxdepth => 3, :log => bt_log)
+
+ expr = nil
+ index = nil
+ index_max = nil
+
+ bt_log.each { |btl|
+ next if btl[0] != :up
+ last = dasm.di_at(btl[4])
+ break if not last or last.block.to_normal.length > 2
+ next if last.block.to_normal.length != 2
+ # search cmp eax, 42 ; ja too_big ; jmp [base+4*eax]
+ # XXX 256 cases switch => no cmp...
+ prelast = last.block.list.reverse.find { |pl| pl.opcode.name == 'cmp' }
+ break unless prelast and cmp_value = prelast.instruction.args.last and cmp_value.kind_of?(Expression) and cmp_value.reduce.kind_of?(::Integer)
+ cmp_value = cmp_value.reduce % (1 << prelast.instruction.args.first.sz) # cmp al, -12h ; jnbe => -12h is unsigned 0eeh
+ index = prelast.instruction.args.first.symbolic(prelast)
+ index = index.externals.first if index.kind_of?(Expression) # cmp bl, 13 => ebx
+ expr = Expression[btl[1], :&, ((1 << @size) - 1)] # XXX without the mask, additions may overflow (this breaks elsewhere too, need Expr32)
+ (expr.externals.grep(Symbol) - [index]).uniq.each { |r|
+ rv = dasm.backtrace(r, prelast.address, :maxdepth => 3)
+ expr = expr.bind(r => rv[0]) if rv.length == 1
}
- l = dasm.auto_label_at(mrm.imm, 'jmp_table', 'xref')
- replace_instr_arg_immediate(di.instruction, mrm.imm, Expression[l])
- # add 'case 1' comments
- cases = {}
- ret.each_with_index { |ind, idx|
- idx -= 1 # ret[0] = symbolic
- next if idx < 0
- a = dasm.backtrace(ind, di.address)
- if a.length == 1 and a[0].kind_of?(Expression) and addr = a[0].reduce and addr.kind_of?(::Integer)
- (cases[addr] ||= []) << idx
- end
- }
- cases.each { |addr, list|
- dasm.add_comment(addr, "case #{list.join(', ')}:")
- }
- return ret
- end
+ cmp_value = prelast.instruction.args.last.reduce % (1 << prelast.instruction.args.first.sz)
+ case last.opcode.name
+ when 'jae', 'jb', 'jnae', 'jnb'; index_max = cmp_value-1
+ when 'ja', 'jbe', 'jna', 'jnbe'; index_max = cmp_value
+ else; expr = nil
+ end
+ break
+ }
- puts "unrecognized jmp table pattern, using wild guess for #{di}" if $VERBOSE
- di.add_comment 'wildguess'
- if s = dasm.get_section_at(mrm.imm - 3*sz/8)
- v = -3
- else
- s = dasm.get_section_at(mrm.imm)
- v = 0
+ if expr and expr.externals.grep(Symbol).uniq == [index]
+ # yay !
+ # include the symbolic dest for backtrace stuff
+ puts "found jmptable for #{Expression[di.address]} #{di.instruction} (#{index_max+1} entries)" if $VERBOSE
+ # TODO add labels / tables / xrefs etc
+ [Expression[arg0]] + (0..index_max).map { |i| expr.bind(index => i) }
end
- while s[0].ptr < s[0].length
- ptr = dasm.normalize s[0].decode_imm("u#{sz}".to_sym, @endianness)
- diff = Expression[ptr, :-, di.address].reduce
- if (diff.kind_of? ::Integer and diff.abs < 4096) or (di.opcode.basename == 'call' and ptr != 0 and dasm.get_section_at(ptr))
- dasm.add_xref(s[1]+s[0].ptr-sz/8, Xref.new(:r, di.address, sz/8))
- ret << Indirection[[mrm.imm, :+, v*sz/8], sz/8, di.address]
- elsif v > 0
- break
- end
- v += 1
- end
- ret
end
# checks if expr is a valid return expression matching the :saveip instruction
def backtrace_is_function_return(expr, di=nil)
expr = Expression[expr].reduce_rec
@@ -1237,15 +1242,19 @@
# if finish is nil, the binding will include :ip, which is the address
# to be executed next (if it exists)
# the binding will not include memory access from subfunctions
# entry should be an entrypoint of the disassembler if finish is nil
# the code sequence must have only one end, with no to_normal
- def code_binding(dasm, entry, finish=nil)
+ # options:
+ # :include_flags => include EFLAGS in the returned binding
+ def code_binding(dasm, entry, finish=nil, nargs={})
+ include_flags = nargs.delete :include_flags
+
entry = dasm.normalize(entry)
finish = dasm.normalize(finish) if finish
lastdi = nil
- binding = {}
+ bd = {}
bt = lambda { |from, expr, inc_start|
ret = dasm.backtrace(Expression[expr], from, :snapshot_addr => entry, :include_start => inc_start)
ret.length == 1 ? ret.first : Expression::Unknown
}
@@ -1266,11 +1275,11 @@
else
# check writes from the instruction
get_xrefs_w(dasm, di).each { |waddr, len|
# we want the ptr expressed with reg values at entry
ptr = bt[a, waddr, false]
- binding[Indirection[ptr, len, a]] = bt[a, Indirection[waddr, len, a], true]
+ bd[Indirection[ptr, len, a]] = bt[a, Indirection[waddr, len, a], true]
}
false
end
}
@@ -1289,17 +1298,17 @@
raise "two-ended code_binding #{lastdi} & #{b.list.last}" if lastdi
lastdi = b.list.last
if lastdi.opcode.props[:setip]
e = get_xrefs_x(dasm, lastdi)
raise 'bad code_binding ending' if e.to_a.length != 1 or not lastdi.opcode.props[:stopexec]
- binding[:ip] = bt[lastdi.address, e.first, false]
+ bd[:ip] = bt[lastdi.address, e.first, false]
elsif not lastdi.opcode.props[:stopexec]
- binding[:ip] = lastdi.next_addr
+ bd[:ip] = lastdi.next_addr
end
end
end
- binding.delete_if { |k, v| Expression[k] == Expression[v] }
+ bd.delete_if { |k, v| Expression[k] == Expression[v] }
# add register binding
raise "no code_binding end" if not lastdi and not finish
register_symbols.each { |reg|
val =
@@ -1308,13 +1317,25 @@
end
next if val == Expression[reg]
mask = 0xffff_ffff # dont use 1<<@size, because 16bit code may use e.g. edi (through opszoverride)
mask = 0xffff_ffff_ffff_ffff if @size == 64
val = Expression[val, :&, mask].reduce
- binding[reg] = Expression[val]
+ bd[reg] = Expression[val]
}
- binding
+ # add EFLAGS binding
+ if include_flags
+ [:eflag_z, :eflag_s, :eflag_c, :eflag_o].each { |eflag|
+ val =
+ if lastdi; bt[lastdi.address, eflag, true]
+ else bt[finish, eflag, false]
+ end
+ next if val == Expression[eflag]
+ bd[eflag] = Expression[val.reduce]
+ }
+ end
+
+ bd
end
# trace the stack pointer register across a function, rename occurences of esp+XX to esp+var_XX
def name_local_vars(dasm, funcaddr)
esp = register_symbols[4]