require File.expand_path(File.join(File.dirname(__FILE__), '..', 'helper')) class TestScrubbers < Test::Unit::TestCase INVALID_FRAGMENT = "foo

bar

bazz
quux
" INVALID_ESCAPED = "<invalid>foo<p>bar</p>bazz</invalid>
quux
" INVALID_PRUNED = "
quux
" INVALID_STRIPPED = "foo

bar

bazz
quux
" WHITEWASH_FRAGMENT = "no
foo
bar" WHITEWASH_RESULT = "
foo
" NOFOLLOW_FRAGMENT = 'Click here' NOFOLLOW_RESULT = 'Click here' ENTITY_FRAGMENT = "

this is < that "&" the other > boo'ya

w00t
" ENTITY_TEXT = %Q(this is < that "&" the other > boo\'yaw00t) ENTITY_HACK_ATTACK = "
Hack attack!
<script>alert('evil')</script>
" ENTITY_HACK_ATTACK_TEXT_SCRUB = "Hack attack!<script>alert('evil')</script>" ENTITY_HACK_ATTACK_TEXT_SCRUB_UNESC = "Hack attack!" context "Document" do context "#scrub!" do context ":escape" do should "escape bad tags" do doc = Loofah::HTML::Document.parse "#{INVALID_FRAGMENT}" result = doc.scrub! :escape assert_equal INVALID_ESCAPED, doc.xpath('/html/body').inner_html assert_equal doc, result end end context ":prune" do should "prune bad tags" do doc = Loofah::HTML::Document.parse "#{INVALID_FRAGMENT}" result = doc.scrub! :prune assert_equal INVALID_PRUNED, doc.xpath('/html/body').inner_html assert_equal doc, result end end context ":strip" do should "strip bad tags" do doc = Loofah::HTML::Document.parse "#{INVALID_FRAGMENT}" result = doc.scrub! :strip assert_equal INVALID_STRIPPED, doc.xpath('/html/body').inner_html assert_equal doc, result end end context ":whitewash" do should "whitewash the markup" do doc = Loofah::HTML::Document.parse "#{WHITEWASH_FRAGMENT}" result = doc.scrub! :whitewash assert_equal WHITEWASH_RESULT, doc.xpath('/html/body').inner_html assert_equal doc, result end end context ":nofollow" do should "add a 'nofollow' attribute to hyperlinks" do doc = Loofah::HTML::Document.parse "#{NOFOLLOW_FRAGMENT}" result = doc.scrub! :nofollow assert_equal NOFOLLOW_RESULT, doc.xpath('/html/body').inner_html assert_equal doc, result end end end context "#scrub_document" do should "be a shortcut for parse-and-scrub" do mock_doc = mock Loofah.expects(:document).with(:string_or_io).returns(mock_doc) mock_doc.expects(:scrub!).with(:method) Loofah.scrub_document(:string_or_io, :method) end end context "#text" do should "leave behind only inner text with html entities still escaped" do doc = Loofah::HTML::Document.parse "#{ENTITY_HACK_ATTACK}" result = doc.text assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result end context "with encode_special_chars => false" do should "leave behind only inner text with html entities unescaped" do doc = Loofah::HTML::Document.parse "#{ENTITY_HACK_ATTACK}" result = doc.text(:encode_special_chars => false) assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB_UNESC, result end end context "with encode_special_chars => true" do should "leave behind only inner text with html entities still escaped" do doc = Loofah::HTML::Document.parse "#{ENTITY_HACK_ATTACK}" result = doc.text(:encode_special_chars => true) assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result end end end context "#to_s" do should "generate HTML" do doc = Loofah.scrub_document "quux
foo
", :prune assert_not_nil doc.xpath("/html").first assert_not_nil doc.xpath("/html/head").first assert_not_nil doc.xpath("/html/body").first string = doc.to_s assert_contains string, // assert_contains string, // assert_contains string, // end end context "#serialize" do should "generate HTML" do doc = Loofah.scrub_document "quux
foo
", :prune assert_not_nil doc.xpath("/html").first assert_not_nil doc.xpath("/html/head").first assert_not_nil doc.xpath("/html/body").first string = doc.serialize assert_contains string, // assert_contains string, // assert_contains string, // end end context "Node" do context "#scrub!" do should "only scrub subtree" do xml = Loofah.document <<-EOHTML
EOHTML node = xml.at_css "div.scrub" node.scrub!(:prune) assert_contains xml.to_s, /I should remain/ assert_does_not_contain xml.to_s, /I should be removed/ end end end context "NodeSet" do context "#scrub!" do should "only scrub subtrees" do xml = Loofah.document <<-EOHTML
EOHTML node_set = xml.css "div.scrub" assert_equal 2, node_set.length node_set.scrub!(:prune) assert_contains xml.to_s, /I should remain/ assert_does_not_contain xml.to_s, /I should be removed/ assert_does_not_contain xml.to_s, /I should also be removed/ end end end end context "DocumentFragment" do context "#scrub!" do context ":escape" do should "escape bad tags" do doc = Loofah::HTML::DocumentFragment.parse "
#{INVALID_FRAGMENT}
" result = doc.scrub! :escape assert_equal INVALID_ESCAPED, doc.xpath("./div").inner_html assert_equal doc, result end end context ":prune" do should "prune bad tags" do doc = Loofah::HTML::DocumentFragment.parse "
#{INVALID_FRAGMENT}
" result = doc.scrub! :prune assert_equal INVALID_PRUNED, doc.xpath("./div").inner_html assert_equal doc, result end end context ":strip" do should "strip bad tags" do doc = Loofah::HTML::DocumentFragment.parse "
#{INVALID_FRAGMENT}
" result = doc.scrub! :strip assert_equal INVALID_STRIPPED, doc.xpath("./div").inner_html assert_equal doc, result end end context ":whitewash" do should "whitewash the markup" do doc = Loofah::HTML::DocumentFragment.parse "
#{WHITEWASH_FRAGMENT}
" result = doc.scrub! :whitewash assert_equal WHITEWASH_RESULT, doc.xpath("./div").inner_html assert_equal doc, result end end context ":nofollow" do should "add a 'nofollow' attribute to hyperlinks" do doc = Loofah::HTML::DocumentFragment.parse "
#{NOFOLLOW_FRAGMENT}
" result = doc.scrub! :nofollow assert_equal NOFOLLOW_RESULT, doc.xpath("./div").inner_html assert_equal doc, result end end end context "#scrub_fragment" do should "be a shortcut for parse-and-scrub" do mock_doc = mock Loofah.expects(:fragment).with(:string_or_io).returns(mock_doc) mock_doc.expects(:scrub!).with(:method) Loofah.scrub_fragment(:string_or_io, :method) end end context "#text" do should "leave behind only inner text with html entities still escaped" do doc = Loofah::HTML::DocumentFragment.parse "
#{ENTITY_HACK_ATTACK}
" result = doc.text assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result end context "with encode_special_chars => false" do should "leave behind only inner text with html entities unescaped" do doc = Loofah::HTML::DocumentFragment.parse "
#{ENTITY_HACK_ATTACK}
" result = doc.text(:encode_special_chars => false) assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB_UNESC, result end end context "with encode_special_chars => true" do should "leave behind only inner text with html entities still escaped" do doc = Loofah::HTML::DocumentFragment.parse "
#{ENTITY_HACK_ATTACK}
" result = doc.text(:encode_special_chars => true) assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result end end end context "#to_s" do should "not remove entities" do string = Loofah.scrub_fragment(ENTITY_FRAGMENT, :prune).to_s assert_contains string, /this is </ end end context "Node" do context "#scrub!" do should "only scrub subtree" do xml = Loofah.fragment <<-EOHTML
EOHTML node = xml.at_css "div.scrub" node.scrub!(:prune) assert_contains xml.to_s, /I should remain/ assert_does_not_contain xml.to_s, /I should be removed/ end end end context "NodeSet" do context "#scrub!" do should "only scrub subtrees" do xml = Loofah.fragment <<-EOHTML
EOHTML node_set = xml.css "div.scrub" assert_equal 2, node_set.length node_set.scrub!(:prune) assert_contains xml.to_s, /I should remain/ assert_does_not_contain xml.to_s, /I should be removed/ assert_does_not_contain xml.to_s, /I should also be removed/ end end end end end