# frozen_string_literal: true require "erb" require "active_support/core_ext/module/redefine_method" require "active_support/multibyte/unicode" class ERB module Util HTML_ESCAPE = { "&" => "&", ">" => ">", "<" => "<", '"' => """, "'" => "'" } JSON_ESCAPE = { "&" => '\u0026', ">" => '\u003e', "<" => '\u003c', "\u2028" => '\u2028', "\u2029" => '\u2029' } HTML_ESCAPE_ONCE_REGEXP = /["><']|&(?!([a-zA-Z]+|(#\d+)|(#[xX][\dA-Fa-f]+));)/ JSON_ESCAPE_REGEXP = /[\u2028\u2029&><]/u # A utility method for escaping HTML tag characters. # This method is also aliased as h. # # puts html_escape('is a > 0 & a < 10?') # # => is a > 0 & a < 10? def html_escape(s) unwrapped_html_escape(s).html_safe end silence_redefinition_of_method :h alias h html_escape module_function :h singleton_class.silence_redefinition_of_method :html_escape module_function :html_escape # HTML escapes strings but doesn't wrap them with an ActiveSupport::SafeBuffer. # This method is not for public consumption! Seriously! def unwrapped_html_escape(s) # :nodoc: s = s.to_s if s.html_safe? s else CGI.escapeHTML(ActiveSupport::Multibyte::Unicode.tidy_bytes(s)) end end module_function :unwrapped_html_escape # A utility method for escaping HTML without affecting existing escaped entities. # # html_escape_once('1 < 2 & 3') # # => "1 < 2 & 3" # # html_escape_once('<< Accept & Checkout') # # => "<< Accept & Checkout" def html_escape_once(s) result = ActiveSupport::Multibyte::Unicode.tidy_bytes(s.to_s).gsub(HTML_ESCAPE_ONCE_REGEXP, HTML_ESCAPE) s.html_safe? ? result.html_safe : result end module_function :html_escape_once # A utility method for escaping HTML entities in JSON strings. Specifically, the # &, > and < characters are replaced with their equivalent unicode escaped form - # \u0026, \u003e, and \u003c. The Unicode sequences \u2028 and \u2029 are also # escaped as they are treated as newline characters in some JavaScript engines. # These sequences have identical meaning as the original characters inside the # context of a JSON string, so assuming the input is a valid and well-formed # JSON value, the output will have equivalent meaning when parsed: # # json = JSON.generate({ name: ""}) # # => "{\"name\":\"\"}" # # json_escape(json) # # => "{\"name\":\"\\u003C/script\\u003E\\u003Cscript\\u003Ealert('PWNED!!!')\\u003C/script\\u003E\"}" # # JSON.parse(json) == JSON.parse(json_escape(json)) # # => true # # The intended use case for this method is to escape JSON strings before including # them inside a script tag to avoid XSS vulnerability: # # # # It is necessary to +raw+ the result of +json_escape+, so that quotation marks # don't get converted to " entities. +json_escape+ doesn't # automatically flag the result as HTML safe, since the raw value is unsafe to # use inside HTML attributes. # # If your JSON is being used downstream for insertion into the DOM, be aware of # whether or not it is being inserted via html(). Most jQuery plugins do this. # If that is the case, be sure to +html_escape+ or +sanitize+ any user-generated # content returned by your JSON. # # If you need to output JSON elsewhere in your HTML, you can just do something # like this, as any unsafe characters (including quotation marks) will be # automatically escaped for you: # #
...
# # WARNING: this helper only works with valid JSON. Using this on non-JSON values # will open up serious XSS vulnerabilities. For example, if you replace the # +current_user.to_json+ in the example above with user input instead, the browser # will happily eval() that string as JavaScript. # # The escaping performed in this method is identical to those performed in the # Active Support JSON encoder when +ActiveSupport.escape_html_entities_in_json+ is # set to true. Because this transformation is idempotent, this helper can be # applied even if +ActiveSupport.escape_html_entities_in_json+ is already true. # # Therefore, when you are unsure if +ActiveSupport.escape_html_entities_in_json+ # is enabled, or if you are unsure where your JSON string originated from, it # is recommended that you always apply this helper (other libraries, such as the # JSON gem, do not provide this kind of protection by default; also some gems # might override +to_json+ to bypass Active Support's encoder). def json_escape(s) result = s.to_s.gsub(JSON_ESCAPE_REGEXP, JSON_ESCAPE) s.html_safe? ? result.html_safe : result end module_function :json_escape end end class Object def html_safe? false end end class Numeric def html_safe? true end end module ActiveSupport #:nodoc: class SafeBuffer < String UNSAFE_STRING_METHODS = %w( capitalize chomp chop delete delete_prefix delete_suffix downcase lstrip next reverse rstrip scrub slice squeeze strip succ swapcase tr tr_s unicode_normalize upcase ) UNSAFE_STRING_METHODS_WITH_BACKREF = %w(gsub sub) alias_method :original_concat, :concat private :original_concat # Raised when ActiveSupport::SafeBuffer#safe_concat is called on unsafe buffers. class SafeConcatError < StandardError def initialize super "Could not concatenate to the buffer because it is not html safe." end end def [](*args) if html_safe? new_string = super return unless new_string new_safe_buffer = new_string.is_a?(SafeBuffer) ? new_string : SafeBuffer.new(new_string) new_safe_buffer.instance_variable_set :@html_safe, true new_safe_buffer else to_str[*args] end end def safe_concat(value) raise SafeConcatError unless html_safe? original_concat(value) end def initialize(str = "") @html_safe = true super end def initialize_copy(other) super @html_safe = other.html_safe? end def clone_empty self[0, 0] end def concat(value) super(html_escape_interpolated_argument(value)) end alias << concat def insert(index, value) super(index, html_escape_interpolated_argument(value)) end def prepend(value) super(html_escape_interpolated_argument(value)) end def replace(value) super(html_escape_interpolated_argument(value)) end def []=(*args) if args.length == 3 super(args[0], args[1], html_escape_interpolated_argument(args[2])) else super(args[0], html_escape_interpolated_argument(args[1])) end end def +(other) dup.concat(other) end def *(*) new_string = super new_safe_buffer = new_string.is_a?(SafeBuffer) ? new_string : SafeBuffer.new(new_string) new_safe_buffer.instance_variable_set(:@html_safe, @html_safe) new_safe_buffer end def %(args) case args when Hash escaped_args = args.transform_values { |arg| html_escape_interpolated_argument(arg) } else escaped_args = Array(args).map { |arg| html_escape_interpolated_argument(arg) } end self.class.new(super(escaped_args)) end def html_safe? defined?(@html_safe) && @html_safe end def to_s self end def to_param to_str end def encode_with(coder) coder.represent_object nil, to_str end UNSAFE_STRING_METHODS.each do |unsafe_method| if unsafe_method.respond_to?(unsafe_method) class_eval <<-EOT, __FILE__, __LINE__ + 1 def #{unsafe_method}(*args, &block) # def capitalize(*args, &block) to_str.#{unsafe_method}(*args, &block) # to_str.capitalize(*args, &block) end # end def #{unsafe_method}!(*args) # def capitalize!(*args) @html_safe = false # @html_safe = false super # super end # end EOT end end UNSAFE_STRING_METHODS_WITH_BACKREF.each do |unsafe_method| if unsafe_method.respond_to?(unsafe_method) class_eval <<-EOT, __FILE__, __LINE__ + 1 def #{unsafe_method}(*args, &block) # def gsub(*args, &block) if block # if block to_str.#{unsafe_method}(*args) { |*params| # to_str.gsub(*args) { |*params| set_block_back_references(block, $~) # set_block_back_references(block, $~) block.call(*params) # block.call(*params) } # } else # else to_str.#{unsafe_method}(*args) # to_str.gsub(*args) end # end end # end def #{unsafe_method}!(*args, &block) # def gsub!(*args, &block) @html_safe = false # @html_safe = false if block # if block super(*args) { |*params| # super(*args) { |*params| set_block_back_references(block, $~) # set_block_back_references(block, $~) block.call(*params) # block.call(*params) } # } else # else super # super end # end end # end EOT end end private def html_escape_interpolated_argument(arg) (!html_safe? || arg.html_safe?) ? arg : CGI.escapeHTML(arg.to_s) end def set_block_back_references(block, match_data) block.binding.eval("proc { |m| $~ = m }").call(match_data) rescue ArgumentError # Can't create binding from C level Proc end end end class String # Marks a string as trusted safe. It will be inserted into HTML with no # additional escaping performed. It is your responsibility to ensure that the # string contains no malicious content. This method is equivalent to the # +raw+ helper in views. It is recommended that you use +sanitize+ instead of # this method. It should never be called on user input. def html_safe ActiveSupport::SafeBuffer.new(self) end end