module Rails
module Html
XPATHS_TO_REMOVE = %w{.//script .//form comment()}
class Sanitizer # :nodoc:
def sanitize(html, options = {})
raise NotImplementedError, "subclasses must implement sanitize method."
end
private
def remove_xpaths(node, xpaths)
node.xpath(*xpaths).remove
node
end
def properly_encode(fragment, options)
fragment.xml? ? fragment.to_xml(options) : fragment.to_html(options)
end
end
# === Rails::Html::FullSanitizer
# Removes all tags but strips out scripts, forms and comments.
#
# full_sanitizer = Rails::Html::FullSanitizer.new
# full_sanitizer.sanitize("Bold no more! See more here...")
# # => Bold no more! See more here...
class FullSanitizer < Sanitizer
def sanitize(html, options = {})
return unless html
return html if html.empty?
loofah_fragment = Loofah.fragment(html)
remove_xpaths(loofah_fragment, XPATHS_TO_REMOVE)
loofah_fragment.scrub!(TextOnlyScrubber.new)
properly_encode(loofah_fragment, encoding: 'UTF-8')
end
end
# === Rails::Html::LinkSanitizer
# Removes +a+ tags and +href+ attributes leaving only the link text.
#
# link_sanitizer = Rails::Html::LinkSanitizer.new
# link_sanitizer.sanitize('Only the link text will be kept.')
#
# => 'Only the link text will be kept.'
class LinkSanitizer < Sanitizer
def initialize
@link_scrubber = TargetScrubber.new
@link_scrubber.tags = %w(a)
@link_scrubber.attributes = %w(href)
end
def sanitize(html, options = {})
Loofah.scrub_fragment(html, @link_scrubber).to_s
end
end
# === Rails::Html::SafeListSanitizer
# Sanitizes html and css from an extensive safe list (see link further down).
#
# === Whitespace
# We can't make any guarantees about whitespace being kept or stripped.
# Loofah uses Nokogiri, which wraps either a C or Java parser for the
# respective Ruby implementation.
# Those two parsers determine how whitespace is ultimately handled.
#
# When the stripped markup will be rendered the users browser won't take
# whitespace into account anyway. It might be better to suggest your users
# wrap their whitespace sensitive content in pre tags or that you do
# so automatically.
#
# === Options
# Sanitizes both html and css via the safe lists found here:
# https://github.com/flavorjones/loofah/blob/master/lib/loofah/html5/whitelist.rb
#
# SafeListSanitizer also accepts options to configure
# the safe list used when sanitizing html.
# There's a class level option:
# Rails::Html::SafeListSanitizer.allowed_tags = %w(table tr td)
# Rails::Html::SafeListSanitizer.allowed_attributes = %w(id class style)
#
# Tags and attributes can also be passed to +sanitize+.
# Passed options take precedence over the class level options.
#
# === Examples
# safe_list_sanitizer = Rails::Html::SafeListSanitizer.new
#
# Sanitize css doesn't take options
# safe_list_sanitizer.sanitize_css('background-color: #000;')
#
# Default: sanitize via a extensive safe list of allowed elements
# safe_list_sanitizer.sanitize(@article.body)
#
# Safe list via the supplied tags and attributes
# safe_list_sanitizer.sanitize(@article.body, tags: %w(table tr td),
# attributes: %w(id class style))
#
# Safe list via a custom scrubber
# safe_list_sanitizer.sanitize(@article.body, scrubber: ArticleScrubber.new)
class SafeListSanitizer < Sanitizer
class << self
attr_accessor :allowed_tags
attr_accessor :allowed_attributes
end
self.allowed_tags = Set.new(%w(strong em b i p code pre tt samp kbd var sub
sup dfn cite big small address hr br div span h1 h2 h3 h4 h5 h6 ul ol li dl dt dd abbr
acronym a img blockquote del ins))
self.allowed_attributes = Set.new(%w(href src width height alt cite datetime title class name xml:lang abbr))
def initialize
@permit_scrubber = PermitScrubber.new
end
def sanitize(html, options = {})
return unless html
return html if html.empty?
loofah_fragment = Loofah.fragment(html)
if scrubber = options[:scrubber]
# No duck typing, Loofah ensures subclass of Loofah::Scrubber
loofah_fragment.scrub!(scrubber)
elsif allowed_tags(options) || allowed_attributes(options)
@permit_scrubber.tags = allowed_tags(options)
@permit_scrubber.attributes = allowed_attributes(options)
loofah_fragment.scrub!(@permit_scrubber)
else
remove_xpaths(loofah_fragment, XPATHS_TO_REMOVE)
loofah_fragment.scrub!(:strip)
end
properly_encode(loofah_fragment, encoding: 'UTF-8')
end
def sanitize_css(style_string)
Loofah::HTML5::Scrub.scrub_css(style_string)
end
private
def allowed_tags(options)
options[:tags] || self.class.allowed_tags
end
def allowed_attributes(options)
options[:attributes] || self.class.allowed_attributes
end
end
WhiteListSanitizer = SafeListSanitizer
end
end