# frozen_string_literal: true module Rails module HTML class Sanitizer class << self def html5_support? return @html5_support if defined?(@html5_support) @html5_support = Loofah.respond_to?(:html5_support?) && Loofah.html5_support? end def best_supported_vendor html5_support? ? Rails::HTML5::Sanitizer : Rails::HTML4::Sanitizer end end def sanitize(html, options = {}) raise NotImplementedError, "subclasses must implement sanitize method." end private def remove_xpaths(node, xpaths) node.xpath(*xpaths).remove node end def properly_encode(fragment, options) fragment.xml? ? fragment.to_xml(options) : fragment.to_html(options) end end module Concern module ComposedSanitize def sanitize(html, options = {}) return unless html return html if html.empty? serialize(scrub(parse_fragment(html), options)) end end module Parser module HTML4 def parse_fragment(html) Loofah.html4_fragment(html) end end module HTML5 def parse_fragment(html) Loofah.html5_fragment(html) end end if Rails::HTML::Sanitizer.html5_support? end module Scrubber module Full def scrub(fragment, options = {}) fragment.scrub!(TextOnlyScrubber.new) end end module Link def initialize super @link_scrubber = TargetScrubber.new @link_scrubber.tags = %w(a) @link_scrubber.attributes = %w(href) end def scrub(fragment, options = {}) fragment.scrub!(@link_scrubber) end end module SafeList # The default safe list for tags DEFAULT_ALLOWED_TAGS = Set.new([ "a", "abbr", "acronym", "address", "b", "big", "blockquote", "br", "cite", "code", "dd", "del", "dfn", "div", "dl", "dt", "em", "h1", "h2", "h3", "h4", "h5", "h6", "hr", "i", "img", "ins", "kbd", "li", "ol", "p", "pre", "samp", "small", "span", "strong", "sub", "sup", "time", "tt", "ul", "var", ]).freeze # The default safe list for attributes DEFAULT_ALLOWED_ATTRIBUTES = Set.new([ "abbr", "alt", "cite", "class", "datetime", "height", "href", "lang", "name", "src", "title", "width", "xml:lang", ]).freeze def self.included(klass) class << klass attr_accessor :allowed_tags attr_accessor :allowed_attributes end klass.allowed_tags = DEFAULT_ALLOWED_TAGS.dup klass.allowed_attributes = DEFAULT_ALLOWED_ATTRIBUTES.dup end def initialize(prune: false) @permit_scrubber = PermitScrubber.new(prune: prune) end def scrub(fragment, options = {}) if scrubber = options[:scrubber] # No duck typing, Loofah ensures subclass of Loofah::Scrubber fragment.scrub!(scrubber) elsif allowed_tags(options) || allowed_attributes(options) @permit_scrubber.tags = allowed_tags(options) @permit_scrubber.attributes = allowed_attributes(options) fragment.scrub!(@permit_scrubber) else fragment.scrub!(:strip) end end def sanitize_css(style_string) Loofah::HTML5::Scrub.scrub_css(style_string) end private def allowed_tags(options) options[:tags] || self.class.allowed_tags end def allowed_attributes(options) options[:attributes] || self.class.allowed_attributes end end end module Serializer module UTF8Encode def serialize(fragment) properly_encode(fragment, encoding: "UTF-8") end end end end end module HTML4 module Sanitizer module VendorMethods def full_sanitizer Rails::HTML4::FullSanitizer end def link_sanitizer Rails::HTML4::LinkSanitizer end def safe_list_sanitizer Rails::HTML4::SafeListSanitizer end def white_list_sanitizer # :nodoc: safe_list_sanitizer end end extend VendorMethods end # == Rails::HTML4::FullSanitizer # # Removes all tags from HTML4 but strips out scripts, forms and comments. # # full_sanitizer = Rails::HTML4::FullSanitizer.new # full_sanitizer.sanitize("Bold no more! See more here...") # # => "Bold no more! See more here..." # class FullSanitizer < Rails::HTML::Sanitizer include HTML::Concern::ComposedSanitize include HTML::Concern::Parser::HTML4 include HTML::Concern::Scrubber::Full include HTML::Concern::Serializer::UTF8Encode end # == Rails::HTML4::LinkSanitizer # # Removes +a+ tags and +href+ attributes from HTML4 leaving only the link text. # # link_sanitizer = Rails::HTML4::LinkSanitizer.new # link_sanitizer.sanitize('Only the link text will be kept.') # # => "Only the link text will be kept." # class LinkSanitizer < Rails::HTML::Sanitizer include HTML::Concern::ComposedSanitize include HTML::Concern::Parser::HTML4 include HTML::Concern::Scrubber::Link include HTML::Concern::Serializer::UTF8Encode end # == Rails::HTML4::SafeListSanitizer # # Sanitizes HTML4 and CSS from an extensive safe list. # # === Whitespace # # We can't make any guarantees about whitespace being kept or stripped. Loofah uses Nokogiri, # which wraps either a C or Java parser for the respective Ruby implementation. Those two # parsers determine how whitespace is ultimately handled. # # When the stripped markup will be rendered the users browser won't take whitespace into account # anyway. It might be better to suggest your users wrap their whitespace sensitive content in # pre tags or that you do so automatically. # # === Options # # Sanitizes both html and css via the safe lists found in # Rails::HTML::Concern::Scrubber::SafeList # # SafeListSanitizer also accepts options to configure the safe list used when sanitizing html. # There's a class level option: # # Rails::HTML4::SafeListSanitizer.allowed_tags = %w(table tr td) # Rails::HTML4::SafeListSanitizer.allowed_attributes = %w(id class style) # # Tags and attributes can also be passed to +sanitize+. Passed options take precedence over the # class level options. # # === Examples # # safe_list_sanitizer = Rails::HTML4::SafeListSanitizer.new # # # default: sanitize via a extensive safe list of allowed elements # safe_list_sanitizer.sanitize(@article.body) # # # sanitize via the supplied tags and attributes # safe_list_sanitizer.sanitize( # @article.body, # tags: %w(table tr td), # attributes: %w(id class style), # ) # # # sanitize via a custom Loofah scrubber # safe_list_sanitizer.sanitize(@article.body, scrubber: ArticleScrubber.new) # # # prune nodes from the tree instead of stripping tags and leaving inner content # safe_list_sanitizer = Rails::HTML4::SafeListSanitizer.new(prune: true) # # # the sanitizer can also sanitize CSS # safe_list_sanitizer.sanitize_css('background-color: #000;') # class SafeListSanitizer < Rails::HTML::Sanitizer include HTML::Concern::ComposedSanitize include HTML::Concern::Parser::HTML4 include HTML::Concern::Scrubber::SafeList include HTML::Concern::Serializer::UTF8Encode end end module HTML5 class Sanitizer class << self def full_sanitizer Rails::HTML5::FullSanitizer end def link_sanitizer Rails::HTML5::LinkSanitizer end def safe_list_sanitizer Rails::HTML5::SafeListSanitizer end def white_list_sanitizer # :nodoc: safe_list_sanitizer end end end # == Rails::HTML5::FullSanitizer # # Removes all tags from HTML5 but strips out scripts, forms and comments. # # full_sanitizer = Rails::HTML5::FullSanitizer.new # full_sanitizer.sanitize("Bold no more! See more here...") # # => "Bold no more! See more here..." # class FullSanitizer < Rails::HTML::Sanitizer include HTML::Concern::ComposedSanitize include HTML::Concern::Parser::HTML5 include HTML::Concern::Scrubber::Full include HTML::Concern::Serializer::UTF8Encode end # == Rails::HTML5::LinkSanitizer # # Removes +a+ tags and +href+ attributes from HTML5 leaving only the link text. # # link_sanitizer = Rails::HTML5::LinkSanitizer.new # link_sanitizer.sanitize('Only the link text will be kept.') # # => "Only the link text will be kept." # class LinkSanitizer < Rails::HTML::Sanitizer include HTML::Concern::ComposedSanitize include HTML::Concern::Parser::HTML5 include HTML::Concern::Scrubber::Link include HTML::Concern::Serializer::UTF8Encode end # == Rails::HTML5::SafeListSanitizer # # Sanitizes HTML5 and CSS from an extensive safe list. # # === Whitespace # # We can't make any guarantees about whitespace being kept or stripped. Loofah uses Nokogiri, # which wraps either a C or Java parser for the respective Ruby implementation. Those two # parsers determine how whitespace is ultimately handled. # # When the stripped markup will be rendered the users browser won't take whitespace into account # anyway. It might be better to suggest your users wrap their whitespace sensitive content in # pre tags or that you do so automatically. # # === Options # # Sanitizes both html and css via the safe lists found in # Rails::HTML::Concern::Scrubber::SafeList # # SafeListSanitizer also accepts options to configure the safe list used when sanitizing html. # There's a class level option: # # Rails::HTML5::SafeListSanitizer.allowed_tags = %w(table tr td) # Rails::HTML5::SafeListSanitizer.allowed_attributes = %w(id class style) # # Tags and attributes can also be passed to +sanitize+. Passed options take precedence over the # class level options. # # === Examples # # safe_list_sanitizer = Rails::HTML5::SafeListSanitizer.new # # # default: sanitize via a extensive safe list of allowed elements # safe_list_sanitizer.sanitize(@article.body) # # # sanitize via the supplied tags and attributes # safe_list_sanitizer.sanitize( # @article.body, # tags: %w(table tr td), # attributes: %w(id class style), # ) # # # sanitize via a custom Loofah scrubber # safe_list_sanitizer.sanitize(@article.body, scrubber: ArticleScrubber.new) # # # prune nodes from the tree instead of stripping tags and leaving inner content # safe_list_sanitizer = Rails::HTML5::SafeListSanitizer.new(prune: true) # # # the sanitizer can also sanitize CSS # safe_list_sanitizer.sanitize_css('background-color: #000;') # class SafeListSanitizer < Rails::HTML::Sanitizer include HTML::Concern::ComposedSanitize include HTML::Concern::Parser::HTML5 include HTML::Concern::Scrubber::SafeList include HTML::Concern::Serializer::UTF8Encode end end if Rails::HTML::Sanitizer.html5_support? module HTML Sanitizer.extend(HTML4::Sanitizer::VendorMethods) # :nodoc: FullSanitizer = HTML4::FullSanitizer # :nodoc: LinkSanitizer = HTML4::LinkSanitizer # :nodoc: SafeListSanitizer = HTML4::SafeListSanitizer # :nodoc: WhiteListSanitizer = SafeListSanitizer # :nodoc: end end