# Copyright 2009 emonti at matasano.com # See README.rdoc for license information # require "stringio" require 'zlib' require 'open3' require 'enumerator' module Rbkb DEFAULT_BYTE_ORDER=:big HEXCHARS = [("0".."9").to_a, ("a".."f").to_a].flatten end # Generates a random alphanumeric string of 'size' bytes (8 by default) def random_alphanum(size = 8) chars = ('A'..'Z').to_a + ('a'..'z').to_a + ('0'..'9').to_a (1..size).collect{|a| chars[rand(chars.size)]}.join end # Generates a random string of 'size' bytes (8 by default) def random_string(size = 8) chars = (0..255).map {|c| c.chr } (1..size).collect {|a| chars[rand(chars.size)]}.join end # Simple syntactic sugar to pass any object to a block def with(x) yield x if block_given?; x end if not defined? with #----------------------------------------------------------------------------- # Mixins and class-specific items class String # fake the ruby 1.9 String#bytes method if we don't have one def bytes ::Enumerable::Enumerator.new(self, :each_byte) end if not defined?("".bytes) # fake the ruby 1.9 String#getbyte method if we don't have one def getbyte(i) self[i] end if RUBY_VERSION.to_f < 1.9 and not defined?("".getbyte) # fake the ruby 1.9 String#ord method if we don't have one def ord getbyte(0) end if not defined?("".ord) # Works just like each_with_index, but with each_byte def each_byte_with_index bytes.each_with_index {|b,i| yield(b,i) } end # shortcut for hex sanity with regex def ishex? ; (self =~ /^[a-f0-9]+$/i) != nil ; end # Encode into percent-hexify url encoding format def urlenc(opts={}) s=self plus = opts[:plus] unless (opts[:rx] ||= /[^A-Za-z0-9_\.~-]/).kind_of? Regexp raise "rx must be a regular expression for a character class" end hx = Rbkb::HEXCHARS s.gsub(opts[:rx]) do |c| c=c.ord (plus and c==32)? '+' : "%" + (hx[(c >> 4)] + hx[(c & 0xf )]) end end # Undo percent-hexified url encoding data def urldec(opts={}) s=self s.gsub!('+', ' ') unless opts[:noplus] s.gsub(/%([A-Fa-f0-9]{2})/) {$1.hex.chr} end # Base64 encode def b64(len=nil) ret = [self].pack("m").gsub("\n", "") if len and Numeric === len ret.scan(/.{1,#{len}}/).join("\n") + "\n" else ret end end # Base64 decode def d64; self.unpack("m").first ; end # right-align to 'a' alignment padded with 'p' def ralign(a, p=' ') p ||= ' ' l = self.length pad = l.pad(a) self.rjust(pad+l, p) end # left-align to 'a' alignment padded with 'p' def lalign(a, p=' ') p ||= ' ' l = self.length pad = l.pad(a) self.ljust(pad+l, p) end # Convert a string to ASCII hex string. Supports a few options for format: # # :delim - delimter between each hex byte # :prefix - prefix before each hex byte # :suffix - suffix after each hex byte # def hexify(opts={}) delim = opts[:delim] pre = (opts[:prefix] || "") suf = (opts[:suffix] || "") if (rx=opts[:rx]) and not rx.kind_of? Regexp raise "rx must be a regular expression for a character class" end hx=Rbkb::HEXCHARS out=Array.new self.each_byte do |c| hc = if (rx and not rx.match c.chr) c.chr else pre + (hx[(c >> 4)] + hx[(c & 0xf )]) + suf end out << (hc) end out.join(delim) end # Convert ASCII hex string to raw. # # Parameters: # # d = optional 'delimiter' between hex bytes (zero+ spaces by default) def unhexify(d=/\s*/) self.strip.gsub(/([A-Fa-f0-9]{1,2})#{d}?/) { $1.hex.chr } end # Converts a hex value to numeric. # # Parameters: # # order => :big or :little endian (default is :big) # def hex_to_num(order=:big) s=self raise "invalid hex value: '#{s.inspect}'" unless s.ishex? r = if order == :little s.scan(/.{2}/).reverse.join elsif order == :big s else raise "Invalid byte order #{order.inspect}" end.hex end # A "generalized" lazy bytestring -> numeric converter. # # Parameters: # # order => :big or :little endian (default is :big) # # Bonus: should work seamlessly with really large strings. # # >> ("\xFF"*10).dat_to_num # => 1208925819614629174706175 # >> ("\xFF"*20).dat_to_num # => 1461501637330902918203684832716283019655932542975 # def dat_to_num(order=:big) s=self s.reverse! if order == :little r = 0 s.each_byte {|c| r = ((r << 8) | c)} r end alias lazy_to_n dat_to_num alias lazy_to_num dat_to_num alias dat_to_n dat_to_num #### Crypto'ey stuff # calculates entropy in string # # TQBF's description: # "I also added a chi-squared test to quickly figure out entropy of a # string, in "bits of randomness per byte". This is useful, so..." def entropy e = 0 0.upto(255) do |i| x = count(i.chr)/size.to_f if x > 0 e += - x * x.log2 end end e end # Produces a character frequency distribution histogram in descending # order. Example: # # pp some_english_text.char_frequency() # # [[" ", 690], # ["e", 354], # ["t", 242], # ["o", 233], # ["i", 218], # ... # ] # def char_frequency hits = {} self.each_byte {|c| hits[c.chr] ||= 0; hits[c.chr] += 1 } hits.to_a.sort {|a,b| b[1] <=> a[1] } end # xor against a key. key will be repeated or truncated to self.size. def xor(k) i=0 self.bytes.map do |b| x = k.getbyte(i) || k.getbyte(i=0) i+=1 (b ^ x).chr end.join end # (en|de)ciphers using a substition cipher en/decoder ring in the form of a # hash with orig => substitute mappings def substitution(keymap) split('').map {|c| (sub=keymap[c]) ? sub : c }.join end # (en|de)crypts using a substition xor en/decoder ring in the form of # a hash with orig => substitute mappings. Used in conjunction with # char_frequency, this sometimes provides a shorter way to derive a single # character xor key used in conjunction with char_frequency. def substitution_xor(keymap) split('').map {|c| (sub=keymap[c]) ? sub.xor(c) : c }.join end # convert bytes to number then xor against another byte-string or number def ^(x) x = x.dat_to_num unless x.is_a? Numeric (self.dat_to_num ^ x)#.to_bytes end # Byte rotation as found in lame ciphers. def rotate_bytes(k=0) k = (256 + k) if k < 0 self.bytes.map {|c| ((c + k) & 0xff).chr }.join end # String randomizer def randomize ; self.split('').randomize.to_s ; end # In-place string randomizer def randomize! ; self.replace(randomize) end # Returns or prints a hexdump in the style of 'hexdump -C' # # :len => optionally specify a length other than 16 for a wider or thinner # dump. If length is an odd number, it will be rounded up. # # :out => optionally specify an alternate IO object for output. By default, # hexdump will output to STDOUT. Pass a StringIO object and it will return # it as a string. # # Example: # # Here's the default behavior done explicitely: # # >> xxd = dat.hexdump(:len => 16, :out => StringIO.new) # => # # Here's how to change it to STDERR # # >> xxd = dat.hexdump(:len => 16, :out => STDERR) # # -> nil # return value is nil! # def hexdump(opt={}) s=self out = opt[:out] || StringIO.new len = (opt[:len] and opt[:len] > 0)? opt[:len] + (opt[:len] % 2) : 16 off = opt[:start_addr] || 0 offlen = opt[:start_len] || 8 hlen=len/2 s.scan(/(?:.|\n){1,#{len}}/) do |m| out.write(off.to_s(16).rjust(offlen, "0") + ' ') i=0 m.each_byte do |c| out.write c.to_s(16).rjust(2,"0") + " " out.write(' ') if (i+=1) == hlen end out.write(" " * (len-i) ) # pad out.write(" ") if i < hlen out.write(" |#{m.tr("\0-\37\177-\377", '.')}|\n") off += m.length end out.write(off.to_s(16).rjust(offlen,'0') + "\n") if out.class == StringIO out.string end end # Converts a hexdump back to binary - takes the same options as hexdump(). # Fairly flexible. Should work both with 'xxd' and 'hexdump -C' style dumps. def dehexdump(opt={}) s=self out = opt[:out] || StringIO.new len = (opt[:len] and opt[:len] > 0)? opt[:len] : 16 hcrx = /[A-Fa-f0-9]/ dumprx = /^(#{hcrx}+):?\s*((?:#{hcrx}{2}\s*){0,#{len}})/ off = opt[:start_addr] || 0 i=1 # iterate each line of hexdump s.split(/\r?\n/).each do |hl| # match and check offset if m = dumprx.match(hl) and $1.hex == off i+=1 # take the data chunk and unhexify it raw = $2.unhexify off += out.write(raw) else raise "Hexdump parse error on line #{i} #{s}" end end if out.class == StringIO out.string end end alias dedump dehexdump alias undump dehexdump alias unhexdump dehexdump # Binary grep # # Parameters: # # find : A Regexp or string to search for in self # align : nil | numeric alignment (matches only made if aligned) def bgrep(find, align=nil) if align and (not align.is_a?(Integer) or align < 0) raise "alignment must be a integer >= 0" end dat=self if find.kind_of? Regexp search = lambda do |m, buf| if m = m.match(buf) mtch = m[0] off,endoff = m.offset(0) return off, endoff, mtch end end else search = lambda do |s, buf| if off = buf.index(s) return off, off+s.size, s end end end ret=[] pos = 0 while (res = search.call(find, dat[pos..-1])) off, endoff, match = res if align and ( pad = (pos+off).pad(align) ) != 0 pos += pad else hit = [pos+off, pos+endoff, match] if not block_given? or yield([pos+off, pos+endoff, match]) ret << hit end pos += endoff end end return ret end # A 'strings' method a-la unix strings utility. Finds printable strings in # a binary blob. # Supports ASCII and little endian unicode (though only for ASCII printable # character.) # # === Parameters and options: # # * Use the :minimum parameter to specify minimum number of characters # to match. (default = 6) # # * Use the :encoding parameter as one of :ascii, :unicode, or :both # (default = :ascii) # # * The 'strings' method uses Regexp under the hood. Therefore # you can pass a character class for "valid characters" with :valid # (default = /[\r\n [:print:]]/) # # * Supports an optional block, which will be passed |offset, type, string| # for each match. # The block's boolean return value also determines whether the match # passes or fails (true or false/nil) and gets returned by the function. # # === Return Value: # # Returns an array consisting of matches with the following elements: # # [[start_offset, end_offset, string_type, string], ...] # # * string_type will be one of :ascii or :unicode # * end_offset will include the terminating null character # * end_offset will include all null bytes in unicode strings (including # * both terminating nulls) # # If strings are null terminated, the trailing null *IS* included # in the end_offset. Unicode matches will also include null bytes. # # Todos? # - better unicode support (i.e. not using half-assed unicode) # - support other encodings such as all those the binutils strings does? def strings(opts={}) opts[:encoding] ||= :both prx = (opts[:valid] || /[\r\n [:print:]]/) min = (opts[:minimum] || 6) align = opts[:align] raise "Minimum must be numeric and > 0" unless min.kind_of? Numeric and min > 0 arx = /(#{prx}{#{min}}?#{prx}*\x00?)/ urx = /((?:#{prx}\x00){#{min}}(?:#{prx}\x00)*(?:\x00\x00)?)/ rx = case (opts[:encoding] || :both).to_sym when :ascii arx when :unicode urx when :both Regexp.union( arx, urx ) else raise "Encoding must be :unicode, :ascii, or :both" end off=0 ret = [] while mtch = rx.match(self[off..-1]) # calculate relative offsets rel_off = mtch.offset(0) startoff = off + rel_off[0] endoff = off + rel_off[1] off += rel_off[1] if align and (pad=startoff.pad(align)) != 0 off = startoff + pad next end stype = if mtch[1] :ascii elsif mtch[2] :unicode end mret = [startoff, endoff, stype, mtch[0] ] # yield to a block for additional criteria next if block_given? and not yield( *mret ) ret << mret end return ret end # Does string "start with" dat? # No clue whether/when this is faster than a regex, but it is easier to type. def starts_with?(dat) self[0,dat.size] == dat end # Returns a single null-terminated ascii string from beginning of self. # This will return the entire string if no null is encountered. # # Parameters: # # off = specify an optional beggining offset # def cstring(off=0) self[ off, self.index("\x00") || self.size ] end # returns CRC32 checksum for the string object def crc32 ## pure ruby version. slower, but here for reference (found on some forum) # r = 0xFFFFFFFF # self.each_byte do |b| # r ^= b # 8.times do # r = (r>>1) ^ (0xEDB88320 * (r & 1)) # end # end # r ^ 0xFFFFFFFF ## or... we can just use: Zlib.crc32 self end # This attempts to identify a blob of data using 'file(1)' via popen3 # (using popen3 because IO.popen blows) # Tried doing this with a fmagic ruby extention to libmagic, but it was # a whole lot slower. def pipe_magick(arg="") ret="" Open3.popen3("file #{arg} -") do |w,r,e| w.write self; w.close ret = r.read ; r.close ret.sub!(/^\/dev\/stdin: /, "") end ret end # Converts a '_' delimited string to CamelCase like 'foo_class' into # 'FooClass'. # See also: camelize_meth, decamelize def camelize self.gsub(/(^|_)([a-z])/) { $2.upcase } end # Converts a '_' delimited string to method style camelCase like 'foo_method' # into 'fooMethod'. # See also: camelize, decamelize def camelize_meth self.gsub(/_([a-z])/) { $1.upcase } end # Converts a CamelCase or camelCase string into '_' delimited form like # 'FooBar' or 'fooBar' into 'foo_bar'. # # Note: This method only handles camel humps. Strings with consecutive # uppercase chars like 'FooBAR' will be converted to 'foo_bar' # # See also: camelize, camelize_meth def decamelize self.gsub(/(^|[a-z])([A-Z])/) do ($1.empty?)? $2 : "#{$1}_#{$2}" end.downcase end # convert a string to its idiomatic ruby class name def class_name r = "" up = true each_byte do |c| if c == 95 if up r << "::" else up = true end else m = up ? :upcase : :to_s r << (c.chr.send(m)) up = false end end r end # Returns a reference to actual constant for a given name in namespace # can be used to lookup classes from enums and such def const_lookup(ns=Object) if c=ns.constants.select {|n| n == self.class_name } and not c.empty? ns.const_get(c.first) end end # Return a self encapsulated in a StringIO object. This is handy. def to_stringio StringIO.new(self) end end # class String class Symbol # looks up this symbol as a constant defined in 'ns' (Object by default) def const_lookup(ns=Object) self.to_s.const_lookup(ns) end end class Array # Should be in the std library. # # keys = [:one, :two, :three] # vals = [1, 2, 3] # # keys.zip(vals).to_hash # #=> {:two=>2, :three=>3, :one=>1}}) # # keys.to_hash(vals) # #=> {:two=>2, :three=>3, :one=>1}}) def to_hash(vals=nil) a = vals ? self.zip(vals) : self a.inject({}) {|hash, i| hash[i[0]] = i[1]; hash} end # randomizes the order of contents in the Array (self) def randomize ; self.sort_by { rand } ; end # Returns a randomly chosen element from self. # Drew *is* sparta. def rand_elem; self[rand(self.length)] ; end end class Float def log2; Math.log(self)/Math.log(2); end end class Numeric # calculate padding based on alignment(a) def pad(a) raise "bad alignment #{a.inspect}" unless a.kind_of? Numeric and a > 0 return self < 1 ? a + self : (a-1) - (self-1) % a end # tells you whether a number is within printable range def printable?; self >= 0x20 and self <= 0x7e; end # just to go with the flow def randomize ; rand(self) ; end # shortcut for packing a single number... wtf... def pack(arg) ; [self].pack(arg) ; end def clear_bits(c) ; (self ^ (self & c)) ; end # Returns an array of chars per 8-bit break-up. # Accepts a block for some transformation on each byte. # (used by to_bytes and to_hex under the hood) # # args: # order: byte order - :big or :little # (only :big has meaning) # siz: pack to this size. larger numbers will wrap def to_chars(order=nil, siz=nil) order ||= Rbkb::DEFAULT_BYTE_ORDER n=self siz ||= self.size ret=[] siz.times do c = (n % 256) if block_given? then (c = yield(c)) end ret << c n=(n >> 8) end return ((order == :big)? ret.reverse : ret) end # "packs" a number into bytes using bit-twiddling instead of pack() # # Uses to_chars under the hood. See also: to_hex # # args: # siz: pack to this size. larger numbers will wrap # order: byte order - :big or :little # (only :big has meaning) def to_bytes(order=nil, siz=nil) to_chars(order,siz) {|c| c.chr }.join end # Converts a number to hex string with width and endian options. # "packs" a number into bytes using bit-twiddling instead of pack() # # Uses to_chars under the hood. See also: to_bytes # # args: # siz: pack to this size. larger numbers will wrap # order: byte order - :big or :little # (only :big has meaning) # def to_hex(o=nil, s=nil) to_chars(o,s) {|c| Rbkb::HEXCHARS[c.clear_bits(0xf) >> 4]+Rbkb::HEXCHARS[c.clear_bits(0xf0)] }.join end # TODO Fix Numeric.to_guid for new to_bytes/char etc. # def to_guid(order=Rbkb::DEFAULT_BYTE_ORDER) # raw = self.to_bytes(order, 16) # a,b,c,d,*e = raw.unpack("VvvnC6").map{|x| x.to_hex} # e = e.join # [a,b,c,d,e].join("-").upcase # end end # class Numeric # some extra features for zlib... more to come? module Zlib OSMAP = { OS_MSDOS => :msdos, OS_AMIGA => :amiga, OS_VMS => :vms, OS_UNIX => :unix, OS_ATARI => :atari, OS_OS2 => :os2, OS_TOPS20 => :tops20, OS_WIN32 => :win32, OS_VMCMS => :vmcms, OS_ZSYSTEM => :zsystem, OS_CPM => :cpm, OS_RISCOS => :riscos, OS_UNKNOWN => :unknown } # Helpers for Zlib::GzipFile... more to come? class GzipFile ## extra info dump for gzipped files def get_xtra_info info = { :file_crc => crc.to_hex, :file_comment => comment, :file_name => orig_name, :level => level, :mtime => mtime, :os => (Zlib::OSMAP[os_code] || os_code) } end end end class Object ## This is from Topher Cyll's Stupd IRB tricks def mymethods (self.methods - self.class.superclass.methods).sort end end module Enumerable def each_recursive(&block) self.each do |n| block.call(n) n.each_recursive(&block) if n.kind_of? Array or n.kind_of? Hash end end end