lib/sanitize.rb in sanitize-2.0.3 vs lib/sanitize.rb in sanitize-2.0.4
- old
+ new
@@ -1,8 +1,8 @@
# encoding: utf-8
#--
-# Copyright (c) 2011 Ryan Grove <ryan@wonko.com>
+# Copyright (c) 2013 Ryan Grove <ryan@wonko.com>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the 'Software'), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
@@ -57,10 +57,23 @@
# were made.
def self.clean!(html, config = {})
Sanitize.new(config).clean!(html)
end
+ # Performs a Sanitize#clean using a full-document HTML parser instead of
+ # the default fragment parser. This will add a DOCTYPE and html tag
+ # unless they are already present
+ def self.clean_document(html, config = {})
+ Sanitize.new(config).clean_document(html)
+ end
+
+ # Performs Sanitize#clean_document in place, returning _html_, or +nil+ if no
+ # changes were made.
+ def self.clean_document!(html, config = {})
+ Sanitize.new(config).clean_document!(html)
+ end
+
# Sanitizes the specified Nokogiri::XML::Node and all its children.
def self.clean_node!(node, config = {})
Sanitize.new(config).clean_node!(node)
end
@@ -94,12 +107,12 @@
end
end
# Performs clean in place, returning _html_, or +nil+ if no changes were
# made.
- def clean!(html)
- fragment = Nokogiri::HTML::DocumentFragment.parse(html)
+ def clean!(html, parser = Nokogiri::HTML::DocumentFragment)
+ fragment = parser.parse(html)
clean_node!(fragment)
output_method_params = {:encoding => @config[:output_encoding], :indent => 0}
if @config[:output] == :xhtml
@@ -112,9 +125,25 @@
end
result = output_method.call(output_method_params)
return result == html ? nil : html[0, html.length] = result
+ end
+
+ def clean_document(html)
+ unless html.nil?
+ clean_document!(html.dup) || html
+ end
+ end
+
+ def clean_document!(html)
+ if !@config[:elements].include?('html') && !@config[:remove_contents]
+ raise 'You must have the HTML element whitelisted to call #clean_document unless remove_contents is set to true'
+ # otherwise Nokogiri will raise for having multiple root nodes when
+ # it moves its children to the root document context
+ end
+
+ clean!(html, Nokogiri::HTML::Document)
end
# Sanitizes the specified Nokogiri::XML::Node and all its children.
def clean_node!(node)
raise ArgumentError unless node.is_a?(Nokogiri::XML::Node)