require "helper" class IntegrationTestScrubbers < Loofah::TestCase INVALID_FRAGMENT = "foo

bar

bazz
quux
" INVALID_ESCAPED = "<invalid>foo<p>bar</p>bazz</invalid>
quux
" INVALID_PRUNED = "
quux
" INVALID_STRIPPED = "foo

bar

bazz
quux
" WHITEWASH_FRAGMENT = "no
foo
bar" WHITEWASH_RESULT = "
foo
" NOFOLLOW_FRAGMENT = 'Click here' NOFOLLOW_RESULT = 'Click here' NOFOLLOW_WITH_REL_FRAGMENT = 'Click here' NOFOLLOW_WITH_REL_RESULT = 'Click here' NOOPENER_FRAGMENT = 'Click here' NOOPENER_RESULT = 'Click here' NOOPENER_WITH_REL_FRAGMENT = 'Click here' NOOPENER_WITH_REL_RESULT = 'Click here' UNPRINTABLE_FRAGMENT = "Lo\u2029ofah ro\u2028cks!" UNPRINTABLE_RESULT = "Loofah rocks!" ENTITY_FRAGMENT = "

this is < that "&" the other > boo'ya

w00t
" ENTITY_TEXT = %Q(this is < that "&" the other > boo\'yaw00t) ENTITY_HACK_ATTACK = "
Hack attack!
<script>alert('evil')</script>
" ENTITY_HACK_ATTACK_TEXT_SCRUB = "Hack attack!<script>alert('evil')</script>" ENTITY_HACK_ATTACK_TEXT_SCRUB_UNESC = "Hack attack!" context "Document" do context "#scrub!" do context ":escape" do it "escape bad tags" do doc = Loofah::HTML::Document.parse "#{INVALID_FRAGMENT}" result = doc.scrub! :escape assert_equal INVALID_ESCAPED, doc.xpath('/html/body').inner_html assert_equal doc, result end end context ":prune" do it "prune bad tags" do doc = Loofah::HTML::Document.parse "#{INVALID_FRAGMENT}" result = doc.scrub! :prune assert_equal INVALID_PRUNED, doc.xpath('/html/body').inner_html assert_equal doc, result end end context ":strip" do it "strip bad tags" do doc = Loofah::HTML::Document.parse "#{INVALID_FRAGMENT}" result = doc.scrub! :strip assert_equal INVALID_STRIPPED, doc.xpath('/html/body').inner_html assert_equal doc, result end end context ":whitewash" do it "whitewash the markup" do doc = Loofah::HTML::Document.parse "#{WHITEWASH_FRAGMENT}" result = doc.scrub! :whitewash assert_equal WHITEWASH_RESULT, doc.xpath('/html/body').inner_html assert_equal doc, result end end context ":nofollow" do it "add a 'nofollow' attribute to hyperlinks" do doc = Loofah::HTML::Document.parse "#{NOFOLLOW_FRAGMENT}" result = doc.scrub! :nofollow assert_equal NOFOLLOW_RESULT, doc.xpath('/html/body').inner_html assert_equal doc, result end end context ":unprintable" do it "removes unprintable unicode characters" do doc = Loofah::HTML::Document.parse "#{UNPRINTABLE_FRAGMENT}" result = doc.scrub! :unprintable assert_equal UNPRINTABLE_RESULT, doc.xpath("/html/body").inner_html assert_equal doc, result end end end context "#scrub_document" do it "be a shortcut for parse-and-scrub" do mock_doc = Object.new mock(Loofah).document(:string_or_io) { mock_doc } mock(mock_doc).scrub!(:method) Loofah.scrub_document(:string_or_io, :method) end end context "#text" do it "leave behind only inner text with html entities still escaped" do doc = Loofah::HTML::Document.parse "#{ENTITY_HACK_ATTACK}" result = doc.text assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result end context "with encode_special_chars => false" do it "leave behind only inner text with html entities unescaped" do doc = Loofah::HTML::Document.parse "#{ENTITY_HACK_ATTACK}" result = doc.text(:encode_special_chars => false) assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB_UNESC, result end end context "with encode_special_chars => true" do it "leave behind only inner text with html entities still escaped" do doc = Loofah::HTML::Document.parse "#{ENTITY_HACK_ATTACK}" result = doc.text(:encode_special_chars => true) assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result end end end context "#to_s" do it "generate HTML" do doc = Loofah.scrub_document "quux
foo
", :prune refute_nil doc.xpath("/html").first refute_nil doc.xpath("/html/head").first refute_nil doc.xpath("/html/body").first string = doc.to_s assert_match %r//, string assert_match %r//, string assert_match %r//, string end end context "#serialize" do it "generate HTML" do doc = Loofah.scrub_document "quux
foo
", :prune refute_nil doc.xpath("/html").first refute_nil doc.xpath("/html/head").first refute_nil doc.xpath("/html/body").first string = doc.serialize assert_match %r//, string assert_match %r//, string assert_match %r//, string end end context "Node" do context "#scrub!" do it "only scrub subtree" do xml = Loofah.document <<-EOHTML
EOHTML node = xml.at_css "div.scrub" node.scrub!(:prune) assert_match %r/I should remain/, xml.to_s refute_match %r/I should be removed/, xml.to_s end end end context "NodeSet" do context "#scrub!" do it "only scrub subtrees" do xml = Loofah.document <<-EOHTML
EOHTML node_set = xml.css "div.scrub" assert_equal 2, node_set.length node_set.scrub!(:prune) assert_match %r/I should remain/, xml.to_s refute_match %r/I should be removed/, xml.to_s refute_match %r/I should also be removed/, xml.to_s end end end end context "DocumentFragment" do context "#scrub!" do context ":escape" do it "escape bad tags" do doc = Loofah::HTML::DocumentFragment.parse "
#{INVALID_FRAGMENT}
" result = doc.scrub! :escape assert_equal INVALID_ESCAPED, doc.xpath("./div").inner_html assert_equal doc, result end end context ":prune" do it "prune bad tags" do doc = Loofah::HTML::DocumentFragment.parse "
#{INVALID_FRAGMENT}
" result = doc.scrub! :prune assert_equal INVALID_PRUNED, doc.xpath("./div").inner_html assert_equal doc, result end end context ":strip" do it "strip bad tags" do doc = Loofah::HTML::DocumentFragment.parse "
#{INVALID_FRAGMENT}
" result = doc.scrub! :strip assert_equal INVALID_STRIPPED, doc.xpath("./div").inner_html assert_equal doc, result end end context ":whitewash" do it "whitewash the markup" do doc = Loofah::HTML::DocumentFragment.parse "
#{WHITEWASH_FRAGMENT}
" result = doc.scrub! :whitewash assert_equal WHITEWASH_RESULT, doc.xpath("./div").inner_html assert_equal doc, result end end context ":nofollow" do context "for a hyperlink that does not have a rel attribute" do it "add a 'nofollow' attribute to hyperlinks" do doc = Loofah::HTML::DocumentFragment.parse "
#{NOFOLLOW_FRAGMENT}
" result = doc.scrub! :nofollow assert_equal NOFOLLOW_RESULT, doc.xpath("./div").inner_html assert_equal doc, result end end context "for a hyperlink that does have a rel attribute" do it "appends nofollow to rel attribute" do doc = Loofah::HTML::DocumentFragment.parse "
#{NOFOLLOW_WITH_REL_FRAGMENT}
" result = doc.scrub! :nofollow assert_equal NOFOLLOW_WITH_REL_RESULT, doc.xpath("./div").inner_html assert_equal doc, result end end end context ":noopener" do context "for a hyperlink without a 'rel' attribute" do it "add a 'noopener' attribute to hyperlinks" do doc = Loofah::HTML::DocumentFragment.parse "
#{NOOPENER_FRAGMENT}
" result = doc.scrub! :noopener assert_equal NOOPENER_RESULT, doc.xpath("./div").inner_html assert_equal doc, result end end context "for a hyperlink that does have a rel attribute" do it "appends 'noopener' to 'rel' attribute" do doc = Loofah::HTML::DocumentFragment.parse "
#{NOOPENER_WITH_REL_FRAGMENT}
" result = doc.scrub! :noopener assert_equal NOOPENER_WITH_REL_RESULT, doc.xpath("./div").inner_html assert_equal doc, result end end end context ":unprintable" do it "removes unprintable unicode characters" do doc = Loofah::HTML::DocumentFragment.parse "
#{UNPRINTABLE_FRAGMENT}
" result = doc.scrub! :unprintable assert_equal UNPRINTABLE_RESULT, doc.xpath("./div").inner_html assert_equal doc, result end end end context "#scrub_fragment" do it "be a shortcut for parse-and-scrub" do mock_doc = Object.new mock(Loofah).fragment(:string_or_io) { mock_doc } mock(mock_doc).scrub!(:method) Loofah.scrub_fragment(:string_or_io, :method) end end context "#text" do it "leave behind only inner text with html entities still escaped" do doc = Loofah::HTML::DocumentFragment.parse "
#{ENTITY_HACK_ATTACK}
" result = doc.text assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result end context "with encode_special_chars => false" do it "leave behind only inner text with html entities unescaped" do doc = Loofah::HTML::DocumentFragment.parse "
#{ENTITY_HACK_ATTACK}
" result = doc.text(:encode_special_chars => false) assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB_UNESC, result end end context "with encode_special_chars => true" do it "leave behind only inner text with html entities still escaped" do doc = Loofah::HTML::DocumentFragment.parse "
#{ENTITY_HACK_ATTACK}
" result = doc.text(:encode_special_chars => true) assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result end end end context "#to_s" do it "not remove entities" do string = Loofah.scrub_fragment(ENTITY_FRAGMENT, :prune).to_s assert_match %r/this is </, string end end context "Node" do context "#scrub!" do it "only scrub subtree" do xml = Loofah.fragment <<-EOHTML
EOHTML node = xml.at_css "div.scrub" node.scrub!(:prune) assert_match %r(I should remain), xml.to_s refute_match %r(I should be removed), xml.to_s end end end context "NodeSet" do context "#scrub!" do it "only scrub subtrees" do xml = Loofah.fragment <<-EOHTML
EOHTML node_set = xml.css "div.scrub" assert_equal 2, node_set.length node_set.scrub!(:prune) assert_match %r/I should remain/, xml.to_s refute_match %r/I should be removed/, xml.to_s refute_match %r/I should also be removed/, xml.to_s end end end end end