Sha256: 3fc0895f4f801b51984171afa79f486fd102f1234aa31f2781310720d3878c24

Contents?: true

Size: 1.32 KB

Versions: 4

Compression:

Stored size: 1.32 KB

Contents

# Code from rubyonrails project (http://www.rubyonrails.com)
# Temporarily here.

require 'html/tokenizer'
require 'html/node'

VERBOTEN_TAGS = %w(form script) unless defined?(VERBOTEN_TAGS)
VERBOTEN_ATTRS = /^on/i unless defined?(VERBOTEN_ATTRS)

class String
	# Sanitizes the given HTML by making form and script tags into regular
	# text, and removing all "onxxx" attributes (so that arbitrary Javascript
	# cannot be executed). Also removes href attributes that start with
	# "javascript:".
	#
	# Returns the sanitized text.
	def self.sanitize(html)
	  # only do this if absolutely necessary
	  if html.index("<")
	    tokenizer = HTML::Tokenizer.new(html)
	    new_text = ""

	    while token = tokenizer.next
	      node = HTML::Node.parse(nil, 0, 0, token, false)
	      new_text << case node
	        when HTML::Tag
	          if VERBOTEN_TAGS.include?(node.name)
	            node.to_s.gsub(/</, "&lt;")
	          else
	            if node.closing != :close
	              node.attributes.delete_if { |attr,v| attr =~ VERBOTEN_ATTRS }
	              if node.attributes["href"] =~ /^javascript:/i
	                node.attributes.delete "href"
	              end
	            end
	            node.to_s
	          end
	        else
	          node.to_s.gsub(/</, "&lt;")
	      end
	    end

	    html = new_text
	  end

	  html
	end
end

Version data entries

4 entries across 4 versions & 1 rubygems

Version Path
glue-0.18.0 lib/glue/sanitize.rb
glue-0.19.0 lib/glue/sanitize.rb
glue-0.20.0 lib/glue/sanitize.rb
glue-0.18.1 lib/glue/sanitize.rb