lib/sanitize.rb in sanitize-1.0.2 vs lib/sanitize.rb in sanitize-1.0.3
- old
+ new
@@ -36,10 +36,18 @@
require 'sanitize/config/basic'
require 'sanitize/config/relaxed'
require 'sanitize/monkeypatch/hpricot'
class Sanitize
+
+ # Matches an attribute value that could be treated by a browser as a URL
+ # with a protocol prefix, such as "http:" or "javascript:". Any string of one
+ # or more characters followed by a colon is considered a match, even if the
+ # colon is encoded as an entity and even if it's an incomplete entity (which
+ # IE6 and Opera will still parse).
+ REGEX_PROTOCOL = /^([^:]+)(?:\:|�*58|�*3a)(?:[^0-9a-f]|$)/i
+
#--
# Class Methods
#++
# Returns a sanitized copy of _html_, using the settings in _config_ if
@@ -48,11 +56,11 @@
sanitize = Sanitize.new(config)
sanitize.clean(html)
end
# Performs Sanitize#clean in place, returning _html_, or +nil+ if no changes
- # were necessary.
+ # were made.
def self.clean!(html, config = {})
sanitize = Sanitize.new(config)
sanitize.clean!(html)
end
@@ -70,11 +78,11 @@
dupe = html.dup
clean!(dupe) || dupe
end
# Performs clean in place, returning _html_, or +nil+ if no changes were
- # necessary.
+ # made.
def clean!(html)
fragment = Hpricot(html)
fragment.traverse_element do |node|
if node.bogusetag? || node.doctype? || node.procins? || node.xmldecl?
@@ -105,10 +113,10 @@
node.raw_attributes.delete_if do |key, value|
next false unless protocol.has_key?(key)
next true if value.nil?
- if value.to_s.downcase =~ /^([^:]+)(?:\:|�*58;|�*3a;)/
+ if value.to_s.downcase =~ REGEX_PROTOCOL
!protocol[key].include?($1.downcase)
else
!protocol[key].include?(:relative)
end
end