# -*- coding: binary -*- ## # $Id: jsobfu.rb 15548 2012-06-29 06:08:20Z rapid7 $ ## require 'rex/text' require 'rkelly' module Rex module Exploitation # # Obfuscate JavaScript by randomizing as much as possible and removing # easily-signaturable string constants. # # Example: # js = ::Rex::Exploitation::JSObfu.new %Q| # var a = "0\\612\\063\\x34\\x35\\x36\\x37\\x38\\u0039"; # var b = { foo : "foo", bar : "bar" } # alert(a); # alert(b.foo); # | # js.obfuscate # puts js # Example Output: # var VwxvESbCgv = String.fromCharCode(0x30,0x31,062,063,064,53,0x36,067,070,0x39); # var ToWZPn = { # "\146\157\x6f": (function () { var yDyv="o",YnCL="o",Qcsa="f"; return Qcsa+YnCL+yDyv })(), # "\142ar": String.fromCharCode(0142,97,0162) # }; # alert(VwxvESbCgv); # alert(ToWZPn.foo); # # NOTE: Variables MUST be declared with a 'var' statement BEFORE first use (or # not at all) for this to generate correct code! If variables are not declared # they will not be randomized but the generated code will be correct. # # Bad Example Javascript: # a = "asdf"; // this variable hasn't been declared and will not be randomized # var a; # alert(a); // real js engines will alert "asdf" here # Bad Example Obfuscated: # a = (function () { var hpHu="f",oyTm="asd"; return oyTm+hpHu })(); # var zSrnHpEfJZtg; # alert(zSrnHpEfJZtg); # Notice that the first usage of +a+ (before it was declared) is not # randomized. Thus, the obfuscated version will alert 'undefined' instead of # "asdf". # class JSObfu # # Abstract Syntax Tree generated by RKelly::Parser#parse # attr_reader :ast # # Saves +code+ for later obfuscation with #obfuscate # def initialize(code) @code = code @funcs = {} @vars = {} @debug = false end # # Add +str+ to the un-obfuscated code. # # Calling this method after #obfuscate is undefined # def <<(str) @code << str end # # Return the (possibly obfuscated) code as a string. # # If #obfuscate has not been called before this, returns the parsed, # unobfuscated code. This can be useful for example to remove comments and # standardize spacing. # def to_s parse if not @ast @ast.to_ecma end # # Return the obfuscated name of a symbol # # You MUST call #obfuscate before this method! # def sym(lookup) if @vars[lookup] ret = @vars[lookup] elsif @funcs[lookup] ret = @funcs[lookup] else ret = lookup end ret end # # Parse and obfuscate # def obfuscate parse obfuscate_r(@ast) end protected # # Recursive method to obfuscate the given +ast+. # # +ast+ should be the result of RKelly::Parser#parse # def obfuscate_r(ast) ast.each do |node| #if node.respond_to? :value and node.value.kind_of? String and node.value =~ /bodyOnLoad/i # $stdout.puts("bodyOnLoad: #{node.class}: #{node.value}") #end case node when nil nil when ::RKelly::Nodes::SourceElementsNode # Recurse obfuscate_r(node.value) #when ::RKelly::Nodes::ObjectLiteralNode # TODO #$stdout.puts(node.methods - Object.new.methods) #$stdout.puts(node.value.inspect) when ::RKelly::Nodes::PropertyNode # Property names must be bare words or string literals NOT # expressions! Can't use transform_string() here if node.name =~ /^[a-zA-Z_][a-zA-Z0-9_]*$/ n = '"' node.name.unpack("C*") { |c| case rand(3) when 0; n << "\\x%02x"%(c) when 1; n << "\\#{c.to_s 8}" when 2; n << [c].pack("C") end } n << '"' node.name = n end # Variables when ::RKelly::Nodes::VarDeclNode if @vars[node.name].nil? #@vars[node.name] = "var_#{Rex::Text.rand_text_alpha(3+rand(12))}_#{node.name}" @vars[node.name] = "#{Rex::Text.rand_text_alpha(3+rand(12))}" end node.name = @vars[node.name] when ::RKelly::Nodes::ParameterNode if @vars[node.value].nil? #@vars[node.value] = "param_#{Rex::Text.rand_text_alpha(3+rand(12))}_#{node.value}" @vars[node.value] = "#{Rex::Text.rand_text_alpha(3+rand(12))}" end node.value = @vars[node.value] when ::RKelly::Nodes::ResolveNode #$stdout.puts("Resolve bodyOnload: #{@vars[node.value]}") if "bodyOnLoad" == node.value node.value = @vars[node.value] if @vars[node.value] when ::RKelly::Nodes::DotAccessorNode case node.value when ::RKelly::Nodes::ResolveNode if @vars[node.value.value] node.value.value = @vars[node.value.value] end #else # $stderr.puts("Non-resolve node as target of dotaccessor: #{node.value.class}") end # Functions when ::RKelly::Nodes::FunctionDeclNode #$stdout.puts("FunctionDecl: #{node.value}") # Functions can also act as objects, so store them in the vars # and the functions list so we can replace them in both places if @funcs[node.value].nil? and not @funcs.values.include?(node.value) #@funcs[node.value] = "func_#{Rex::Text.rand_text_alpha(3+rand(12))}_#{node.value}" @funcs[node.value] = "#{Rex::Text.rand_text_alpha(3+rand(12))}" if @vars[node.value].nil? @vars[node.value] = @funcs[node.value] end node.value = @funcs[node.value] end when ::RKelly::Nodes::FunctionCallNode # The value of a FunctionCallNode is some sort of accessor node or a ResolveNode # so this is basically useless #$stdout.puts("Function call: #{node.name} => #{@funcs[node.name]}") #node.value = @funcs[node.value] if @funcs[node.value] # Transformers when ::RKelly::Nodes::NumberNode node.value = transform_number(node.value) when ::RKelly::Nodes::StringNode node.value = transform_string(node.value) else #$stderr.puts "#{node.class}: #{node.value}" #$stderr.puts "#{node.class}" end #unless node.kind_of? ::RKelly::Nodes::SourceElementsNode # $stderr.puts "#{node.class}: #{node.value}" #end end nil end # # Generate an Abstract Syntax Tree (#ast) for later obfuscation # def parse parser = RKelly::Parser.new @ast = parser.parse(@code) end # # Convert a number to a random base (decimal, octal, or hexedecimal). # # Given 10 as input, the possible return values are: # "10" # "0xa" # "012" # def rand_base(num) case rand(3) when 0; num.to_s when 1; "0%o" % num when 2; "0x%x" % num end end # # Return a mathematical expression that will evaluate to the given number # +num+. # # +num+ can be a float or an int, but should never be negative. # def transform_number(num) case num when Fixnum if num == 0 r = rand(10) + 1 transformed = "('#{Rex::Text.rand_text_alpha(r)}'.length - #{r})" elsif num > 0 and num < 10 # use a random string.length for small numbers transformed = "'#{Rex::Text.rand_text_alpha(num)}'.length" else transformed = "(" divisor = rand(num) + 1 a = num / divisor.to_i b = num - (a * divisor) # recurse half the time for a a = (rand(2) == 0) ? transform_number(a) : rand_base(a) # recurse half the time for divisor divisor = (rand(2) == 0) ? transform_number(divisor) : rand_base(divisor) transformed << "#{a}*#{divisor}" transformed << "+#{b}" transformed << ")" end when Float transformed = "(#{num - num.floor} + #{rand_base(num.floor)})" end #puts("#{num} == #{transformed}") transformed end # # Convert a javascript string into something that will generate that string. # # Randomly calls one of the +transform_string_*+ methods # def transform_string(str) quote = str[0,1] # pull off the quotes str = str[1,str.length - 2] return quote*2 if str.length == 0 case rand(2) when 0 transformed = transform_string_split_concat(str, quote) when 1 transformed = transform_string_fromCharCode(str) #when 2 # # Currently no-op # transformed = transform_string_unescape(str) end #$stderr.puts "Obfuscating str: #{str.ljust 30} #{transformed}" transformed end # # Split a javascript string, +str+, without breaking escape sequences. # # The maximum length of each piece of the string is half the total length # of the string, ensuring we (almost) always split into at least two # pieces. This won't always be true when given a string like "AA\x41", # where escape sequences artificially increase the total length (escape # sequences are considered a single character). # # Returns an array of two-element arrays. The zeroeth element is a # randomly generated variable name, the first is a piece of the string # contained in +quote+s. # # See #escape_length # def safe_split(str, quote) parts = [] max_len = str.length / 2 while str.length > 0 len = 0 loop do e_len = escape_length(str[len..-1]) e_len = 1 if e_len.nil? len += e_len # if we've reached the end of the string, bail break unless str[len] break if len > max_len # randomize the length of each part break if (rand(4) == 0) end part = str.slice!(0, len) var = Rex::Text.rand_text_alpha(4) parts.push( [ var, "#{quote}#{part}#{quote}" ] ) end parts end # # Stolen from obfuscatejs.rb # # Determines the length of an escape sequence # def escape_length(str) esc_len = nil if str[0,1] == "\\" case str[1,1] when "u"; esc_len = 6 # unicode \u1234 when "x"; esc_len = 4 # hex, \x41 when /[0-7]/ # octal, \123, \0 str[1,3] =~ /([0-7]{1,3})/ if $1.to_i(8) > 255 str[1,3] =~ /([0-7]{1,2})/ end esc_len = 1 + $1.length else; esc_len = 2 # \" \n, etc. end end esc_len end # # Split a javascript string, +str+, into multiple randomly-ordered parts # and return an anonymous javascript function that joins them in the # correct order. This method can be called safely on strings containing # escape sequences. See #safe_split. # def transform_string_split_concat(str, quote) parts = safe_split(str, quote) func = "(function () { var " ret = "; return " parts.sort { |a,b| rand }.each do |part| func << "#{part[0]}=#{part[1]}," end func.chop! ret << parts.map{|part| part[0]}.join("+") final = func + ret + " })()" final end # TODO #def transform_string_unescape(str) # str #end # # Return a call to String.fromCharCode() with each char of the input as arguments # # Example: # input : "A\n" # output: String.fromCharCode(0x41, 10) # def transform_string_fromCharCode(str) buf = "String.fromCharCode(" bytes = str.unpack("C*") len = 0 while str.length > 0 if str[0,1] == "\\" str.slice!(0,1) # then this is an escape sequence and we need to deal with all # the special cases case str[0,1] # For chars that contain their non-escaped selves, step past # the backslash and let the rand_base() below decide how to # represent the character. when '"', "'", "\\", " " char = str.slice!(0,1).unpack("C").first # For symbolic escapes, use the known value when "n"; char = 0x0a; str.slice!(0,1) when "t"; char = 0x09; str.slice!(0,1) # Lastly, if it's a hex, unicode, or octal escape, pull out the # real value and use that when "x" # Strip the x str.slice!(0,1) char = str.slice!(0,2).to_i 16 when "u" # This can potentially lose information in the case of # characters like \u0041, but since regular ascii is stored # as unicode internally, String.fromCharCode(0x41) will be # represented as 00 41 in memory anyway, so it shouldn't # matter. str.slice!(0,1) char = str.slice!(0,4).to_i 16 when /[0-7]/ # Octals are a bit harder since they are variable width and # don't necessarily mean what you might think. For example, # "\61" == "1" and "\610" == "10". 610 is a valid octal # number, but not a valid ascii character. Javascript will # interpreter as much as it can as a char and use the rest # as a literal. Boo. str =~ /([0-7]{1,3})/ char = $1.to_i 8 if char > 255 str =~ /([0-7]{1,2})/ char = $1.to_i 8 end str.slice!(0,$1.length) end else char = str.slice!(0,1).unpack("C").first end buf << "#{rand_base(char)}," end # Strip off the last comma buf = buf[0,buf.length-1] + ")" transformed = buf transformed end end end end =begin if __FILE__ == $0 if ARGV[0] code = File.read(ARGV[0]) else #require 'rex/exploitation/javascriptosdetect' #code = Rex::Exploitation::JavascriptOSDetect.new.to_s code = <<-EOS // Should alert "0123456789" var a = "0\\612\\063\\x34\\x35\\x36\\x37\\x38\\u0039"; var a,b=2,c=3; alert(a); // should alert "asdfjkl;" var d = (function() { var foo = "jkl;", blah = "asdf"; return blah + foo; })(); alert(d); EOS end js = Rex::Exploitation::JSObfu.new(code) js.obfuscate puts js.to_s end =end