#
# these tests taken from the HTML5 sanitization project and modified for use with Loofah
# see the original here: http://code.google.com/p/html5lib/source/browse/ruby/test/test_sanitizer.rb
#
# license text at the bottom of this file
#
require "helper"
class Html5TestSanitizer < Loofah::TestCase
include Loofah
def sanitize_xhtml stream
Loofah.fragment(stream).scrub!(:escape).to_xhtml
end
def sanitize_html stream
Loofah.fragment(stream).scrub!(:escape).to_html
end
def check_sanitization(input, htmloutput, xhtmloutput, rexmloutput)
## libxml uses double-quotes, so let's swappo-boppo our quotes before comparing.
sane = sanitize_html(input).gsub('"',"'")
htmloutput.gsub!('"',"'")
xhtmloutput.gsub!('"',"'")
rexmloutput.gsub!('"',"'")
## HTML5's parsers are shit. there's so much inconsistency with what has closing tags, etc, that
## it would require a lot of manual hacking to make the tests match libxml's output.
## instead, I'm taking the shotgun approach, and trying to match any of the described outputs.
assert((htmloutput == sane) || (rexmloutput == sane) || (xhtmloutput == sane),
%Q{given: "#{input}"\nexpected: "#{htmloutput}"\ngot: "#{sane}"})
end
def assert_completes_in_reasonable_time &block
t0 = Time.now
block.call
assert_in_delta t0, Time.now, 0.1 # arbitrary seconds
end
(HTML5::WhiteList::ALLOWED_ELEMENTS).each do |tag_name|
define_method "test_should_allow_#{tag_name}_tag" do
input = "<#{tag_name} title='1'>foo bar baz#{tag_name}>"
htmloutput = "<#{tag_name.downcase} title='1'>foo <bad>bar</bad> baz#{tag_name.downcase}>"
xhtmloutput = "<#{tag_name} title='1'>foo <bad>bar</bad> baz#{tag_name}>"
rexmloutput = xhtmloutput
if %w[caption colgroup optgroup option tbody td tfoot th thead tr].include?(tag_name)
htmloutput = "foo <bad>bar</bad> baz"
xhtmloutput = htmloutput
elsif tag_name == 'col'
htmloutput = "
"
check_sanitization(input, htmloutput, output, output)
end
##
## libxml2 downcases attributes, so this is moot.
##
# HTML5::WhiteList::ALLOWED_ATTRIBUTES.each do |attribute_name|
# define_method "test_should_forbid_#{attribute_name.upcase}_attribute" do
# input = "
foo bar baz
"
# output = "
foo <bad>bar</bad> baz
"
# check_sanitization(input, output, output, output)
# end
# end
HTML5::WhiteList::ALLOWED_PROTOCOLS.each do |protocol|
define_method "test_should_allow_#{protocol}_uris" do
input = %(foo)
output = "foo"
check_sanitization(input, output, output, output)
end
end
HTML5::WhiteList::ALLOWED_PROTOCOLS.each do |protocol|
define_method "test_should_allow_uppercase_#{protocol}_uris" do
input = %(foo)
output = "foo"
check_sanitization(input, output, output, output)
end
end
HTML5::WhiteList::ALLOWED_URI_DATA_MEDIATYPES.each do |data_uri_type|
define_method "test_should_allow_data_#{data_uri_type}_uris" do
input = %(foo)
output = "foo"
check_sanitization(input, output, output, output)
input = %(foo)
output = "foo"
check_sanitization(input, output, output, output)
end
end
HTML5::WhiteList::ALLOWED_URI_DATA_MEDIATYPES.each do |data_uri_type|
define_method "test_should_allow_uppercase_data_#{data_uri_type}_uris" do
input = %(foo)
output = "foo"
check_sanitization(input, output, output, output)
end
end
def test_should_disallow_other_uri_mediatypes
input = %(foo)
output = "foo"
check_sanitization(input, output, output, output)
input = %(foo)
output = "foo"
check_sanitization(input, output, output, output)
input = %(foo)
output = "foo"
check_sanitization(input, output, output, output)
end
HTML5::WhiteList::SVG_ALLOW_LOCAL_HREF.each do |tag_name|
next unless HTML5::WhiteList::ALLOWED_ELEMENTS.include?(tag_name)
define_method "test_#{tag_name}_should_allow_local_href" do
input = %(<#{tag_name} xlink:href="#foo"/>)
output = "<#{tag_name.downcase} xlink:href='#foo'>#{tag_name.downcase}>"
xhtmloutput = "<#{tag_name} xlink:href='#foo'>#{tag_name}>"
check_sanitization(input, output, xhtmloutput, xhtmloutput)
end
define_method "test_#{tag_name}_should_allow_local_href_with_newline" do
input = %(<#{tag_name} xlink:href="\n#foo"/>)
output = "<#{tag_name.downcase} xlink:href='\n#foo'>#{tag_name.downcase}>"
xhtmloutput = "<#{tag_name} xlink:href='\n#foo'>#{tag_name}>"
check_sanitization(input, output, xhtmloutput, xhtmloutput)
end
define_method "test_#{tag_name}_should_forbid_nonlocal_href" do
input = %(<#{tag_name} xlink:href="http://bad.com/foo"/>)
output = "<#{tag_name.downcase}>#{tag_name.downcase}>"
xhtmloutput = "<#{tag_name}>#{tag_name}>"
check_sanitization(input, output, xhtmloutput, xhtmloutput)
end
define_method "test_#{tag_name}_should_forbid_nonlocal_href_with_newline" do
input = %(<#{tag_name} xlink:href="\nhttp://bad.com/foo"/>)
output = "<#{tag_name.downcase}>#{tag_name.downcase}>"
xhtmloutput = "<#{tag_name}>#{tag_name}>"
check_sanitization(input, output, xhtmloutput, xhtmloutput)
end
end
def test_figure_element_is_valid
fragment = Loofah.scrub_fragment("hello ", :prune)
assert fragment.at_css("figure"), "