lib/sanitize.rb in sanitize-1.0.0 vs lib/sanitize.rb in sanitize-1.0.1
- old
+ new
@@ -1,18 +1,18 @@
#--
-# Copyright (c) 2008 Ryan Grove <ryan@wonko.com>
-#
+# Copyright (c) 2009 Ryan Grove <ryan@wonko.com>
+#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the 'Software'), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
-#
+#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
-#
+#
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
@@ -23,17 +23,21 @@
# Append this file's directory to the include path if it's not there already.
$:.unshift(File.dirname(File.expand_path(__FILE__)))
$:.uniq!
require 'rubygems'
-gem 'hpricot', '~> 0.6'
+gem 'hpricot', '~> 0.6'
+gem 'htmlentities', '~> 4.0.0'
+
require 'hpricot'
+require 'htmlentities'
require 'sanitize/config'
require 'sanitize/config/restricted'
require 'sanitize/config/basic'
require 'sanitize/config/relaxed'
+require 'sanitize/monkeypatch/hpricot'
class Sanitize
#--
# Class Methods
#++
@@ -98,12 +102,17 @@
# Delete remaining attributes that use unacceptable protocols.
if @config[:protocols].has_key?(name)
protocol = @config[:protocols][name]
node.raw_attributes.delete_if do |key, value|
- protocol.has_key?(key) && (!(value.downcase =~ /^([^:]+):/) ||
- !protocol[key].include?($1.downcase))
+ next false unless protocol.has_key?(key)
+
+ if value.downcase =~ /^([^:]+)(?:\:|�*58;|�*3a;)/
+ !protocol[key].include?($1.downcase)
+ else
+ !protocol[key].include?(:relative)
+ end
end
end
else
# Delete all attributes from elements with no whitelisted
# attributes.
@@ -115,15 +124,18 @@
node.raw_attributes.merge!(@config[:add_attributes][name])
end
end
end
- # Make one last pass through the fragment and replace angle brackets with
- # entities in all text nodes. This helps eliminate certain types of
- # maliciously-malformed nested tags.
+ # Make one last pass through the fragment and encode all special HTML chars
+ # and non-ASCII chars as entities. This eliminates certain types of
+ # maliciously-malformed nested tags and also compensates for Hpricot's
+ # burning desire to decode all entities.
+ coder = HTMLEntities.new
+
fragment.traverse_element do |node|
if node.text?
- node.swap(node.inner_text.gsub('<', '<').gsub('>', '>'))
+ node.swap(coder.encode(node.inner_text, :named))
end
end
result = fragment.to_s
return result == html ? nil : html[0, html.length] = result