lib/sanitize.rb in sanitize-1.0.2 vs lib/sanitize.rb in sanitize-1.0.3

- old
+ new

@@ -36,10 +36,18 @@ require 'sanitize/config/basic' require 'sanitize/config/relaxed' require 'sanitize/monkeypatch/hpricot' class Sanitize + + # Matches an attribute value that could be treated by a browser as a URL + # with a protocol prefix, such as "http:" or "javascript:". Any string of one + # or more characters followed by a colon is considered a match, even if the + # colon is encoded as an entity and even if it's an incomplete entity (which + # IE6 and Opera will still parse). + REGEX_PROTOCOL = /^([^:]+)(?:\:|&#0*58|&#x0*3a)(?:[^0-9a-f]|$)/i + #-- # Class Methods #++ # Returns a sanitized copy of _html_, using the settings in _config_ if @@ -48,11 +56,11 @@ sanitize = Sanitize.new(config) sanitize.clean(html) end # Performs Sanitize#clean in place, returning _html_, or +nil+ if no changes - # were necessary. + # were made. def self.clean!(html, config = {}) sanitize = Sanitize.new(config) sanitize.clean!(html) end @@ -70,11 +78,11 @@ dupe = html.dup clean!(dupe) || dupe end # Performs clean in place, returning _html_, or +nil+ if no changes were - # necessary. + # made. def clean!(html) fragment = Hpricot(html) fragment.traverse_element do |node| if node.bogusetag? || node.doctype? || node.procins? || node.xmldecl? @@ -105,10 +113,10 @@ node.raw_attributes.delete_if do |key, value| next false unless protocol.has_key?(key) next true if value.nil? - if value.to_s.downcase =~ /^([^:]+)(?:\:|&#0*58;|&#x0*3a;)/ + if value.to_s.downcase =~ REGEX_PROTOCOL !protocol[key].include?($1.downcase) else !protocol[key].include?(:relative) end end