# frozen_string_literal: true # # Copyright (c) 2006-2022 Hal Brodigan (postmodern.mod3 at gmail.com) # # ronin-support is free software: you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published # by the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # ronin-support is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with ronin-support. If not, see . # require 'ronin/support/binary/ctypes' require 'chars' module Ronin module Support module Binary module Hexdump # # @since 0.5.0 # # @api semipublic # class Parser # The character type. # # @note it uses `'C'` as the pack string. CHAR_TYPE = CTypes::CharType.new(signed: false, pack_string: 'C') # Supported types. # # @note The `:char` and `:uchar` types to a custom char type that uses # `'C'` as it's pack string. TYPES = CTypes::TYPES.merge( char: CHAR_TYPE, uchar: CHAR_TYPE ) # Visible characters VISIBLE_CHARS = Hash[ Chars::VISIBLE.chars.sort.zip(Chars::VISIBLE.bytes.sort) ] # Escaped characters CHARS = { '\0' => 0x00, '\a' => 0x07, '\b' => 0x08, '\t' => 0x09, '\n' => 0x0a, '\v' => 0x0b, '\f' => 0x0c, '\r' => 0x0d, ' ' => 0x20 }.merge(VISIBLE_CHARS) # od named characters NAMED_CHARS = { 'nul' => 0x00, 'soh' => 0x01, 'stx' => 0x02, 'etx' => 0x03, 'eot' => 0x04, 'enq' => 0x05, 'ack' => 0x06, 'bel' => 0x07, 'bs' => 0x08, 'ht' => 0x09, 'lf' => 0x0a, 'nl' => 0x0a, 'vt' => 0x0b, 'ff' => 0x0c, 'cr' => 0x0d, 'so' => 0x0e, 'si' => 0x0f, 'dle' => 0x10, 'dc1' => 0x11, 'dc2' => 0x12, 'dc3' => 0x13, 'dc4' => 0x14, 'nak' => 0x15, 'syn' => 0x16, 'etb' => 0x17, 'can' => 0x18, 'em' => 0x19, 'sub' => 0x1a, 'esc' => 0x1b, 'fs' => 0x1c, 'gs' => 0x1d, 'rs' => 0x1e, 'us' => 0x1f, 'sp' => 0x20, 'del' => 0x7f }.merge(VISIBLE_CHARS) # The format to parse. # # @return [:hexdump, :od] attr_reader :format # The type of data to parse. # # @return [:integer, :float] attr_reader :type # The base of all addresses to parse # # @return [2, 8, 10, 16] attr_reader :address_base # The base of all words to parse # # @return [2, 8, 10, 16] attr_reader :base # # Initializes the hexdump parser. # # @param [:od, :hexdump] format # The expected format of the hexdump. Must be either `:od` or # `:hexdump`. # # @param [Hash{Symbol => Object}] kwargs # Additional keyword arguments. # # @option kwargs [Symbol] :type # Denotes the encoding used for the bytes within the hexdump. # Must be one of the following: # * `:byte` (default for `format: :hexdump`) # * `:char` # * `:uint8` # * `:uint16` # * `:uint32` # * `:uint64` # * `:int8` # * `:int16` # * `:int32` # * `:int64` # * `:uchar` # * `:ushort` # * `:uint` # * `:ulong` # * `:ulong_long` # * `:short` # * `:int` # * `:long` # * `:long_long` # * `:float` # * `:double` # * `:float_le` # * `:double_le` # * `:float_be` # * `:double_be` # * `:uint16_le` (default for `format: :od`) # * `:uint32_le` # * `:uint64_le` # * `:int16_le` # * `:int32_le` # * `:int64_le` # * `:uint16_be` # * `:uint32_be` # * `:uint64_be` # * `:int16_be` # * `:int32_be` # * `:int64_be` # * `:ushort_le` # * `:uint_le` # * `:ulong_le` # * `:ulong_long_le` # * `:short_le` # * `:int_le` # * `:long_le` # * `:long_long_le` # * `:ushort_be` # * `:uint_be` # * `:ulong_be` # * `:ulong_long_be` # * `:short_be` # * `:int_be` # * `:long_be` # * `:long_long_be` # # @option kwargs [2, 8, 10, 16, nil] :address_base # The numerical base that the offset addresses are encoded in. # Defaults to 16 when `format: :hexdump` and 8 when `format: :od`. # # @option kwargs [2, 8, 10, 16, nil] :base # The numerical base that the hexdumped numbers are encoded in. # Defaults to 16 when `format: :hexdump` and 8 when `format: :od`. # # @option kwargs [Boolean] :named_chars # Indicates to parse `od`-style named characters (ex: `nul`, # `del`, etc). Only recognized when `format: :od` is also given. # # @raise [ArgumentError] # Unsupported `type:` value, the `type:` value was not a scalar # type, or the `format:` was not `:hexdump` or `:od`. # def initialize(format: :hexdump, **kwargs) case format when :od then initialize_od(**kwargs) when :hexdump then initialize_hexdump(**kwargs) else raise(ArgumentError,"format: must be either :hexdump or :od, was #{format.inspect}") end case @type when CTypes::FloatType @parse_method = method(:parse_float) when CTypes::CharType @parse_method = method(:parse_char_or_int) when CTypes::ScalarType @parse_method = method(:parse_int) else raise(ArgumentError,"only scalar types are support: #{kwargs[:type].inspect}") end end private # # Initializes instance variables for the `od` hexdump format. # def initialize_od(type: :uint16_le, base: nil, address_base: nil, named_chars: nil) @format = :od @type = TYPES[type] @base = base || 8 @address_base = address_base || 8 case @type when CTypes::CharType @chars = if named_chars then NAMED_CHARS else CHARS end end end # # Initializes instance variables for the `hexdump` hexdump format. # def initialize_hexdump(type: :byte, base: nil, address_base: nil) @format = :hexdump @type = TYPES[type] @base = base || 16 @address_base = address_base || 16 case @type when CTypes::CharType @base = 8 @chars = CHARS end end public # # Parses a hexdump. # # @param [String, IO] hexdump # The hexdump output. # # @yield [address, values] # If a block is given, it will be passed each parsed line of the # hexdump. # # @yieldparam [Integer] address # The parsed address from the hexdump line. # # @yieldparam [Array] values # The parsed values from a line in the hexdump. # # @return [Integer, Enumerator] # If a block is given, then the last address will be returned # representing the total length of the hexdump. # If no block is given, an Enumerator will be returned. # def parse(hexdump) return enum_for(__method__,hexdump) unless block_given? previous_address = nil first_address = nil previous_row = nil previous_row_repeats = false previous_row_size = nil starts_repeating_at = nil hexdump.each_line do |line| line.chomp! # remove GNU hexdump's ASCII column line.sub!(/\s+\|.{1,16}\|\s*$/,'') if @format == :hexdump if line == '*' previous_row_repeats = true previous_row_size = (previous_row.length * @type.size) starts_repeating_at = previous_address + previous_row_size else address, row = parse_line(line) first_address ||= address if previous_row_repeats # fill in the omitted repeating rows range = starts_repeating_at...address addresses = range.step(previous_row_size) addresses.each do |address| yield address, previous_row end previous_row_repeats = false end yield address, row if row previous_address = address previous_row = row end end # return the last address as the length return previous_address - first_address end # # Unhexdumps a hexdump and returns the unpacked values. # # @return [Array, Array, Array] # The Array of unpacked values from the hexdump. # # @since 1.0.0 # def unpack(hexdump) values = [] parse(hexdump) do |address,row| values.concat(row) end return values end # # Unhexdumps a hexdump and returns the raw data. # # @param [String, IO] hexdump # The contents of the hexdump. # # @return [String] # The raw data from the hexdump. # # @since 1.0.0 # def unhexdump(hexdump) buffer = String.new(encoding: Encoding::ASCII_8BIT) length = parse(hexdump) do |address,row| first_address ||= address buffer << pack(row) end return buffer.byteslice(0,length) end # # Parses an address. # # @param [String] address # The text of the address. # # @return [Integer] # The parsed address. # # @api private # def parse_address(address) address.to_i(@address_base) end # # Parses an Integer. # # @param [String] string # The text of the Integer. # # @return [Integer] # The parsed Integer. # # @api private # def parse_int(string) string.to_i(@base) end # # Parses an integer or a ASCII character. # # @param [String] string # The text of the integer or character. # # @return [Integer] # The parsed integer or byte value of the character. # def parse_char_or_int(string) @chars.fetch(string) do |string| string.to_i(@base) end end # # Parses a float. # # @param [String] string # The text of the float. # # @return [Float] # The parsed float. # def parse_float(string) string.to_f end # # Parses a line from the hexdump. # # @param [String] line # A line from a hexdump. # # @return [(Integer, Array)] # The parse address and the parsed numbers from the line. # def parse_line(line) if @type.kind_of?(CTypes::CharType) # because od/hexdump print the ' ' char as white space, # we need special parsing logic here. if (start_index = line.index(' ')) address = parse_address(line[0,start_index]) rest = line[start_index..] numbers = rest.scan(/ ( )|([^\s]+)/) numbers.map! { |(sp,char)| sp || char } numbers.map!(&@parse_method) return address, numbers else return parse_address(line) end else address, *numbers = line.split address = parse_address(address) numbers.map!(&@parse_method) unless numbers.empty? return address, numbers else return address end end end # # Packs a segment back into bytes. # # @param [Array] values # A segment of words. # # @return [String] # The packed segment. # # @api private # def pack(values) values.pack(@type.pack_string * values.length) end end end end end end