lib/sanitize.rb in sanitize-4.6.6 vs lib/sanitize.rb in sanitize-5.0.0

- old
+ new

@@ -119,23 +119,11 @@ # Returns a sanitized copy of the given _html_ fragment. def fragment(html) return '' unless html html = preprocess(html) - doc = Nokogiri::HTML5.parse("<html><body>#{html}") - - # Hack to allow fragments containing <body>. Borrowed from - # Nokogiri::HTML::DocumentFragment. - if html =~ /\A<body(?:\s|>)/i - path = '/html/body' - else - path = '/html/body/node()' - end - - frag = doc.fragment - frag << doc.xpath(path) - + frag = Nokogiri::HTML5.fragment(html) node!(frag) to_html(frag) end # @deprecated Use {#fragment} instead. @@ -182,40 +170,10 @@ html.gsub!(REGEX_UNSUITABLE_CHARS, '') html end def to_html(node) - replace_meta = false - - # Hacky workaround for a libxml2 bug that adds an undesired Content-Type - # meta tag to all serialized HTML documents. - # - # https://github.com/sparklemotion/nokogiri/issues/1008 - if node.type == Nokogiri::XML::Node::DOCUMENT_NODE || - node.type == Nokogiri::XML::Node::HTML_DOCUMENT_NODE - - regex_meta = %r|(<html[^>]*>\s*<head[^>]*>\s*)<meta http-equiv="Content-Type" content="text/html; charset=utf-8">|i - - # Only replace the content-type meta tag if <meta> isn't whitelisted or - # the original document didn't actually include a content-type meta tag. - replace_meta = !@config[:elements].include?('meta') || - node.xpath('/html/head/meta[@http-equiv]').none? do |meta| - meta['http-equiv'].casecmp('content-type').zero? - end - end - - so = Nokogiri::XML::Node::SaveOptions - - # Serialize to HTML without any formatting to prevent Nokogiri from adding - # newlines after certain tags. - html = node.to_html( - :encoding => 'utf-8', - :indent => 0, - :save_with => so::NO_DECLARATION | so::NO_EMPTY_TAGS | so::AS_HTML - ) - - html.gsub!(regex_meta, '\1') if replace_meta - html + node.to_html(preserve_newline: true) end def transform_node!(node, node_whitelist) @transformers.each do |transformer| # Since transform_node! may be called in a tight loop to process thousands