Sha256: a5f4005aaec4c4335a6d13efcf32bb7d469291b0b9655d6c7be8b3a1a31a7ee1

Contents?: true

Size: 1.5 KB

Versions: 2

Compression:

Stored size: 1.5 KB

Contents

require 'addressable/uri'
require 'sanitize'
require 'with_deep_merge'

class Govspeak::HtmlSanitizer
  include WithDeepMerge

  class ImageSourceWhitelister
    def initialize(allowed_image_hosts)
      @allowed_image_hosts = allowed_image_hosts
    end

    def call(sanitize_context)
      return unless sanitize_context[:node_name] == "img"

      node = sanitize_context[:node]
      image_uri = Addressable::URI.parse(node['src'])
      unless image_uri.relative? || @allowed_image_hosts.include?(image_uri.host)
        node.unlink # the node isn't sanitary. Remove it from the document.
      end
    end
  end

  def initialize(dirty_html, options = {})
    @dirty_html = dirty_html
    @allowed_image_hosts = options[:allowed_image_hosts]
  end

  def sanitize
    transformers = []
    if @allowed_image_hosts && @allowed_image_hosts.any?
      transformers << ImageSourceWhitelister.new(@allowed_image_hosts)
    end
    Sanitize.clean(@dirty_html, sanitize_config.merge(transformers: transformers))
  end

  def sanitize_without_images
    config = sanitize_config
    config[:elements].delete('img')
    Sanitize.clean(@dirty_html, config)
  end

  def sanitize_config
    deep_merge(Sanitize::Config::RELAXED, {
      attributes: {
        :all => Sanitize::Config::RELAXED[:attributes][:all] + [ "id", "class", "role", "aria-label" ],
        "a"  => Sanitize::Config::RELAXED[:attributes]["a"] + [ "rel" ],
      },
      elements: Sanitize::Config::RELAXED[:elements] + [ "div", "span", "aside" ],
    })
  end
end

Version data entries

2 entries across 2 versions & 1 rubygems

Version Path
govspeak-3.5.2 lib/govspeak/html_sanitizer.rb
govspeak-3.5.1 lib/govspeak/html_sanitizer.rb