sanitize.rb in sanitize-5.0.0

- old
+ new

@@ -119,23 +119,11 @@
   # Returns a sanitized copy of the given _html_ fragment.
   def fragment(html)
     return '' unless html
 
     html = preprocess(html)
-    doc  = Nokogiri::HTML5.parse("<html><body>#{html}")
-
-    # Hack to allow fragments containing <body>. Borrowed from
-    # Nokogiri::HTML::DocumentFragment.
-    if html =~ /\A<body(?:\s|>)/i
-      path = '/html/body'
-    else
-      path = '/html/body/node()'
-    end
-
-    frag = doc.fragment
-    frag << doc.xpath(path)
-
+    frag  = Nokogiri::HTML5.fragment(html)
     node!(frag)
     to_html(frag)
   end
 
   # @deprecated Use {#fragment} instead.
@@ -182,40 +170,10 @@
     html.gsub!(REGEX_UNSUITABLE_CHARS, '')
     html
   end
 
   def to_html(node)
-    replace_meta = false
-
-    # Hacky workaround for a libxml2 bug that adds an undesired Content-Type
-    # meta tag to all serialized HTML documents.
-    #
-    # https://github.com/sparklemotion/nokogiri/issues/1008
-    if node.type == Nokogiri::XML::Node::DOCUMENT_NODE ||
-        node.type == Nokogiri::XML::Node::HTML_DOCUMENT_NODE
-
-      regex_meta   = %r|(<html[^>]*>\s*<head[^>]*>\s*)<meta http-equiv="Content-Type" content="text/html; charset=utf-8">|i
-
-      # Only replace the content-type meta tag if <meta> isn't whitelisted or
-      # the original document didn't actually include a content-type meta tag.
-      replace_meta = !@config[:elements].include?('meta') ||
-        node.xpath('/html/head/meta[@http-equiv]').none? do |meta|
-          meta['http-equiv'].casecmp('content-type').zero?
-        end
-    end
-
-    so = Nokogiri::XML::Node::SaveOptions
-
-    # Serialize to HTML without any formatting to prevent Nokogiri from adding
-    # newlines after certain tags.
-    html = node.to_html(
-      :encoding  => 'utf-8',
-      :indent    => 0,
-      :save_with => so::NO_DECLARATION | so::NO_EMPTY_TAGS | so::AS_HTML
-    )
-
-    html.gsub!(regex_meta, '\1') if replace_meta
-    html
+    node.to_html(preserve_newline: true)
   end
 
   def transform_node!(node, node_whitelist)
     @transformers.each do |transformer|
       # Since transform_node! may be called in a tight loop to process thousands