# Copyright (c) 2023 Contrast Security, Inc. See https://www.contrastsecurity.com/enduser-terms-0317a for more details. # frozen_string_literal: true require 'base64' require 'cgi' require 'uri' module Contrast module Agent module Protect module Rule module InputClassification # Module to hold different encoding utils. module Encoding include Contrast::Components::Logger::InstanceMethods # Still a list is needed for this one, as it is not possible to determine if the value is encoded or not. # As long as the list is short the method has a good percentage of success. KNOWN_DECODING_EXCEPTIONS = %w[cmd version if_modified_since].cs__freeze # This methods is not performant, but is more safe for false positive. # Base64 check is no trivial task. For example if one passes a value like 'stringdw' it will return true, # or value 'pass', but it is indeed not encoded. using regexp like: # # ^([A-Za-z0-9+/]{4})*([A-Za-z0-9+/]{3}=|[A-Za-z0-9+/]{2}==)?$ # # This will fail with any inputs from above, and this is because any characters with 4 bytes will be # considered as base64 encoded, and without additional context it is impossible to determine if the # value is encoded or not. Not to mention the above regexp will not detect empty spaces. # # Alternative to the above regexp, acting the same way, could be this: # # Base64.strict_encode64(Base64.decode64(value)) == value # # Making an exception list is not a good idea, because it will be hard to maintain. # # The Base64 method will return printable ascii characters, so we can use this to determine if the value is # encoded or not. # # The solution in this case is encoding the value, and then decoding it. If the value is already encoded # it will not be eq to the original value. If the value is not encoded, it will be eq to the original value. # # @param value [String] input to check for encoding status. # @param input_type [Symbol] input type. # @return [Boolean] true if value is base64 encoded, false otherwise. def cs__base64? value, input_type return false unless value.is_a?(String) return false if Contrast::Utils::DuckUtils.empty_duck?(value) # Encoded string levels of decoding example: # # Value encoded 'pass' => 'cGFzcw==' # decode level 0 => 'pass' # decode level 1 => '\xA5\xAB,' # decode level 2 => '' check_value = value.dup return false if KNOWN_DECODING_EXCEPTIONS.include?(check_value) level = 0 iteration = 0 until Contrast::Utils::DuckUtils.empty_duck?(Base64.decode64(check_value)) iteration += 1 # handle cases like 'command' or 'injection' which will check out as encoded regarding the level of # decoding, but will produce ascii escape characters on first iteration, rather than decoded value. level += 1 unless iteration == 2 && ::CGI.escape(check_value) != check_value check_value = Base64.decode64(check_value) end # if we have more than 2 levels the value is encoded. base64 = level > 1 # Call base64 statistics: if base64 Contrast::Agent::Protect::InputAnalyzer.base64_statistic.match!(input_type) else Contrast::Agent::Protect::InputAnalyzer.base64_statistic.mismatch!(input_type) end base64 rescue StandardError => e logger.error('Error while checking for base64 encoding', error: e, message: e.message, backtrace: e.backtrace) false end # This method will decode the value using Base64.decode64, only if value was encoded. # If value is not encoded, it will return the original value. # # @param value [String] input to decode. # @param input_type [Symbol] input type. # @return [String] decoded or original value. # @raise [StandardError] def cs__decode64 value, input_type return value unless cs__base64?(value, input_type) new_value = try_base64_decode(value) new_value, success = normalize_encoding(new_value) return new_value if success value rescue StandardError => e logger.error('Error while decoding base64', error: e, message: e.message, backtrace: e.backtrace) value end private # Try and decode the value, do not use decoding if the value have zero bytes. def try_base64_decode value new_value = Base64.decode64(value) # check for null bytes: return new_value unless new_value.bytes.select(&:zero?).any? value end # Detecting encoded Base64 is not perfect. In some cases it will detect certain inputs as # encoded and will try to decode them. Even if the decoding is successful, the value may be # encoded back to ASCII format. AgentLib will raise UTF-8 error in this case. # This method will try to normalize the encoding to UTF-8. If the encoding fails, this means # that the decoding was not successful and the value will be returned as is. Otherwise a # base64 decoded string with ASCII-8BIT encoding will be parsed to UTF-8 without errors. # # @param value [String] input to normalize. # @return [Array] normalized value and success flag. def normalize_encoding value new_value = value.dup.encode!('Windows-1252').force_encoding('UTF-8') [new_value, true] rescue StandardError # encoding failed, or the decoding from base64 failed. [nil, false] end end end end end end end