lib/sanitize.rb in sanitize-1.0.0 vs lib/sanitize.rb in sanitize-1.0.1

- old
+ new

@@ -1,18 +1,18 @@ #-- -# Copyright (c) 2008 Ryan Grove <ryan@wonko.com> -# +# Copyright (c) 2009 Ryan Grove <ryan@wonko.com> +# # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the 'Software'), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: -# +# # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. -# +# # THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, @@ -23,17 +23,21 @@ # Append this file's directory to the include path if it's not there already. $:.unshift(File.dirname(File.expand_path(__FILE__))) $:.uniq! require 'rubygems' -gem 'hpricot', '~> 0.6' +gem 'hpricot', '~> 0.6' +gem 'htmlentities', '~> 4.0.0' + require 'hpricot' +require 'htmlentities' require 'sanitize/config' require 'sanitize/config/restricted' require 'sanitize/config/basic' require 'sanitize/config/relaxed' +require 'sanitize/monkeypatch/hpricot' class Sanitize #-- # Class Methods #++ @@ -98,12 +102,17 @@ # Delete remaining attributes that use unacceptable protocols. if @config[:protocols].has_key?(name) protocol = @config[:protocols][name] node.raw_attributes.delete_if do |key, value| - protocol.has_key?(key) && (!(value.downcase =~ /^([^:]+):/) || - !protocol[key].include?($1.downcase)) + next false unless protocol.has_key?(key) + + if value.downcase =~ /^([^:]+)(?:\:|&#0*58;|&#x0*3a;)/ + !protocol[key].include?($1.downcase) + else + !protocol[key].include?(:relative) + end end end else # Delete all attributes from elements with no whitelisted # attributes. @@ -115,15 +124,18 @@ node.raw_attributes.merge!(@config[:add_attributes][name]) end end end - # Make one last pass through the fragment and replace angle brackets with - # entities in all text nodes. This helps eliminate certain types of - # maliciously-malformed nested tags. + # Make one last pass through the fragment and encode all special HTML chars + # and non-ASCII chars as entities. This eliminates certain types of + # maliciously-malformed nested tags and also compensates for Hpricot's + # burning desire to decode all entities. + coder = HTMLEntities.new + fragment.traverse_element do |node| if node.text? - node.swap(node.inner_text.gsub('<', '&lt;').gsub('>', '&gt;')) + node.swap(coder.encode(node.inner_text, :named)) end end result = fragment.to_s return result == html ? nil : html[0, html.length] = result