lib/sanitize.rb in sanitize-1.2.1.dev.20100122 vs lib/sanitize.rb in sanitize-1.2.1.dev.20100124

- old
+ new

@@ -70,10 +70,19 @@ def initialize(config = {}) # Sanitize configuration. @config = Config::DEFAULT.merge(config) @config[:transformers] = Array(@config[:transformers]) + # :remove_contents takes precedence over :escape_only. + if @config[:remove_contents] && @config[:escape_only] + @config[:escape_only] = false + end + + # Convert the list of allowed elements to a Hash for faster lookup. + @allowed_elements = {} + @config[:elements].each {|el| @allowed_elements[el] = true } + # Specific nodes to whitelist (along with all their attributes). This array # is generated at runtime by transformers, and is cleared before and after # a fragment is cleaned (so it applies only to a specific fragment). @whitelist_nodes = [] end @@ -85,14 +94,12 @@ end # Performs clean in place, returning _html_, or +nil+ if no changes were # made. def clean!(html) - @whitelist_nodes = [] fragment = Nokogiri::HTML::DocumentFragment.parse(html) clean_node!(fragment) - @whitelist_nodes = [] output_method_params = {:encoding => 'utf-8', :indent => 0} if @config[:output] == :xhtml output_method = fragment.method(:to_xhtml) @@ -114,21 +121,30 @@ # Sanitizes the specified Nokogiri::XML::Node and all its children. def clean_node!(node) raise ArgumentError unless node.is_a?(Nokogiri::XML::Node) - node.traverse do |traversed_node| - if traversed_node.element? - clean_element!(traversed_node) - elsif traversed_node.comment? - traversed_node.unlink unless @config[:allow_comments] - elsif traversed_node.cdata? - traversed_node.replace(Nokogiri::XML::Text.new(traversed_node.text, - traversed_node.document)) + @whitelist_nodes = [] + + node.traverse do |child| + if child.element? + clean_element!(child) + elsif child.comment? + unless @config[:allow_comments] + if @config[:escape_only] + child.replace(Nokogiri::XML::Text.new(child.to_s, child.document)) + else + child.unlink + end + end + elsif child.cdata? + child.replace(Nokogiri::XML::Text.new(child.text, child.document)) end end + @whitelist_nodes = [] + node end private @@ -141,15 +157,20 @@ return if @whitelist_nodes.include?(node) name = node.name.to_s.downcase # Delete any element that isn't in the whitelist. - unless transform[:whitelist] || @config[:elements].include?(name) - unless @config[:remove_contents] - node.children.each { |n| node.add_previous_sibling(n) } + unless transform[:whitelist] || @allowed_elements[name] + if @config[:escape_only] + node.replace(Nokogiri::XML::Text.new(node.to_s, node.document)) + else + unless @config[:remove_contents] + node.children.each { |n| node.add_previous_sibling(n) } + end + + node.unlink end - node.unlink return end attr_whitelist = (transform[:attr_whitelist] + (@config[:attributes][name] || []) +