lib/sanitize.rb in sanitize-2.0.3 vs lib/sanitize.rb in sanitize-2.0.4

- old
+ new

@@ -1,8 +1,8 @@ # encoding: utf-8 #-- -# Copyright (c) 2011 Ryan Grove <ryan@wonko.com> +# Copyright (c) 2013 Ryan Grove <ryan@wonko.com> # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the 'Software'), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell @@ -57,10 +57,23 @@ # were made. def self.clean!(html, config = {}) Sanitize.new(config).clean!(html) end + # Performs a Sanitize#clean using a full-document HTML parser instead of + # the default fragment parser. This will add a DOCTYPE and html tag + # unless they are already present + def self.clean_document(html, config = {}) + Sanitize.new(config).clean_document(html) + end + + # Performs Sanitize#clean_document in place, returning _html_, or +nil+ if no + # changes were made. + def self.clean_document!(html, config = {}) + Sanitize.new(config).clean_document!(html) + end + # Sanitizes the specified Nokogiri::XML::Node and all its children. def self.clean_node!(node, config = {}) Sanitize.new(config).clean_node!(node) end @@ -94,12 +107,12 @@ end end # Performs clean in place, returning _html_, or +nil+ if no changes were # made. - def clean!(html) - fragment = Nokogiri::HTML::DocumentFragment.parse(html) + def clean!(html, parser = Nokogiri::HTML::DocumentFragment) + fragment = parser.parse(html) clean_node!(fragment) output_method_params = {:encoding => @config[:output_encoding], :indent => 0} if @config[:output] == :xhtml @@ -112,9 +125,25 @@ end result = output_method.call(output_method_params) return result == html ? nil : html[0, html.length] = result + end + + def clean_document(html) + unless html.nil? + clean_document!(html.dup) || html + end + end + + def clean_document!(html) + if !@config[:elements].include?('html') && !@config[:remove_contents] + raise 'You must have the HTML element whitelisted to call #clean_document unless remove_contents is set to true' + # otherwise Nokogiri will raise for having multiple root nodes when + # it moves its children to the root document context + end + + clean!(html, Nokogiri::HTML::Document) end # Sanitizes the specified Nokogiri::XML::Node and all its children. def clean_node!(node) raise ArgumentError unless node.is_a?(Nokogiri::XML::Node)