lib/sanitize.rb in sanitize-1.2.1.dev.20100122 vs lib/sanitize.rb in sanitize-1.2.1.dev.20100124
- old
+ new
@@ -70,10 +70,19 @@
def initialize(config = {})
# Sanitize configuration.
@config = Config::DEFAULT.merge(config)
@config[:transformers] = Array(@config[:transformers])
+ # :remove_contents takes precedence over :escape_only.
+ if @config[:remove_contents] && @config[:escape_only]
+ @config[:escape_only] = false
+ end
+
+ # Convert the list of allowed elements to a Hash for faster lookup.
+ @allowed_elements = {}
+ @config[:elements].each {|el| @allowed_elements[el] = true }
+
# Specific nodes to whitelist (along with all their attributes). This array
# is generated at runtime by transformers, and is cleared before and after
# a fragment is cleaned (so it applies only to a specific fragment).
@whitelist_nodes = []
end
@@ -85,14 +94,12 @@
end
# Performs clean in place, returning _html_, or +nil+ if no changes were
# made.
def clean!(html)
- @whitelist_nodes = []
fragment = Nokogiri::HTML::DocumentFragment.parse(html)
clean_node!(fragment)
- @whitelist_nodes = []
output_method_params = {:encoding => 'utf-8', :indent => 0}
if @config[:output] == :xhtml
output_method = fragment.method(:to_xhtml)
@@ -114,21 +121,30 @@
# Sanitizes the specified Nokogiri::XML::Node and all its children.
def clean_node!(node)
raise ArgumentError unless node.is_a?(Nokogiri::XML::Node)
- node.traverse do |traversed_node|
- if traversed_node.element?
- clean_element!(traversed_node)
- elsif traversed_node.comment?
- traversed_node.unlink unless @config[:allow_comments]
- elsif traversed_node.cdata?
- traversed_node.replace(Nokogiri::XML::Text.new(traversed_node.text,
- traversed_node.document))
+ @whitelist_nodes = []
+
+ node.traverse do |child|
+ if child.element?
+ clean_element!(child)
+ elsif child.comment?
+ unless @config[:allow_comments]
+ if @config[:escape_only]
+ child.replace(Nokogiri::XML::Text.new(child.to_s, child.document))
+ else
+ child.unlink
+ end
+ end
+ elsif child.cdata?
+ child.replace(Nokogiri::XML::Text.new(child.text, child.document))
end
end
+ @whitelist_nodes = []
+
node
end
private
@@ -141,15 +157,20 @@
return if @whitelist_nodes.include?(node)
name = node.name.to_s.downcase
# Delete any element that isn't in the whitelist.
- unless transform[:whitelist] || @config[:elements].include?(name)
- unless @config[:remove_contents]
- node.children.each { |n| node.add_previous_sibling(n) }
+ unless transform[:whitelist] || @allowed_elements[name]
+ if @config[:escape_only]
+ node.replace(Nokogiri::XML::Text.new(node.to_s, node.document))
+ else
+ unless @config[:remove_contents]
+ node.children.each { |n| node.add_previous_sibling(n) }
+ end
+
+ node.unlink
end
- node.unlink
return
end
attr_whitelist = (transform[:attr_whitelist] +
(@config[:attributes][name] || []) +