lib/sanitize.rb in sanitize-2.0.0.dev.20101225 vs lib/sanitize.rb in sanitize-2.0.0.dev.20110105

- old
+ new

@@ -1,8 +1,8 @@ # encoding: utf-8 #-- -# Copyright (c) 2010 Ryan Grove <ryan@wonko.com> +# Copyright (c) 2011 Ryan Grove <ryan@wonko.com> # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the 'Software'), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell @@ -68,18 +68,22 @@ # Instance Methods #++ # Returns a new Sanitize object initialized with the settings in _config_. def initialize(config = {}) - @config = Config::DEFAULT.merge(config) - @transformers = Array(@config[:transformers].dup) + @config = Config::DEFAULT.merge(config) - # Default transformers. These always run at the end of the transformer - # chain, after any custom transformers. - @transformers << Transformers::CleanComment unless @config[:allow_comments] + @transformers = { + :breadth => Array(@config[:transformers_breadth].dup), + :depth => Array(@config[:transformers]) + Array(@config[:transformers_depth]) + } - @transformers << + # Default depth transformers. These always run at the end of the chain, + # after any custom transformers. + @transformers[:depth] << Transformers::CleanComment unless @config[:allow_comments] + + @transformers[:depth] << Transformers::CleanCDATA << Transformers::CleanElement.new(@config) end # Returns a sanitized copy of _html_. @@ -115,36 +119,51 @@ # Sanitizes the specified Nokogiri::XML::Node and all its children. def clean_node!(node) raise ArgumentError unless node.is_a?(Nokogiri::XML::Node) node_whitelist = Set.new - node.traverse {|child| transform_node!(child, node_whitelist) } + unless @transformers[:breadth].empty? + traverse_breadth(node) {|n| transform_node!(n, node_whitelist, :breadth) } + end + + traverse_depth(node) {|n| transform_node!(n, node_whitelist, :depth) } node end private - def transform_node!(node, node_whitelist) - @transformers.each do |transformer| + def transform_node!(node, node_whitelist, mode) + @transformers[mode].each do |transformer| result = transformer.call({ :config => @config, :is_whitelisted => node_whitelist.include?(node), :node => node, :node_name => node.name.downcase, - :node_whitelist => node_whitelist + :node_whitelist => node_whitelist, + :traversal_mode => mode }) if result.is_a?(Hash) && result[:node_whitelist].respond_to?(:each) node_whitelist.merge(result[:node_whitelist]) end - - # If the node has been unlinked or replaced, there's no point running - # subsequent transformers. - break if node.parent.nil? && !node.fragment? end node + end + + # Performs breadth-first traversal, operating first on the root node, then + # traversing downwards. + def traverse_breadth(node, &block) + block.call(node) + node.children.each {|child| traverse_breadth(child, &block) } + end + + # Performs depth-first traversal, operating first on the deepest nodes in the + # document, then traversing upwards to the root. + def traverse_depth(node, &block) + node.children.each {|child| traverse_depth(child, &block) } + block.call(node) end class Error < StandardError; end end