require "helper"
class IntegrationTestScrubbers < Loofah::TestCase
INVALID_FRAGMENT = "foobar
bazz
quux
"
INVALID_ESCAPED = "<invalid>foo<p>bar</p>bazz</invalid>quux
"
INVALID_PRUNED = "quux
"
INVALID_STRIPPED = "foobar
bazzquux
"
WHITEWASH_FRAGMENT = "nofoo
bar"
WHITEWASH_RESULT = "foo
"
NOFOLLOW_FRAGMENT = 'Click here'
NOFOLLOW_RESULT = 'Click here'
NOFOLLOW_WITH_REL_FRAGMENT = 'Click here'
NOFOLLOW_WITH_REL_RESULT = 'Click here'
NOOPENER_FRAGMENT = 'Click here'
NOOPENER_RESULT = 'Click here'
NOOPENER_WITH_REL_FRAGMENT = 'Click here'
NOOPENER_WITH_REL_RESULT = 'Click here'
UNPRINTABLE_FRAGMENT = "Lo\u2029ofah ro\u2028cks!"
UNPRINTABLE_RESULT = "Loofah rocks!"
ENTITY_FRAGMENT = "this is < that "&" the other > boo'ya
w00t
"
ENTITY_TEXT = %Q(this is < that "&" the other > boo\'yaw00t)
ENTITY_HACK_ATTACK = "Hack attack!
<script>alert('evil')</script>
"
ENTITY_HACK_ATTACK_TEXT_SCRUB = "Hack attack!<script>alert('evil')</script>"
ENTITY_HACK_ATTACK_TEXT_SCRUB_UNESC = "Hack attack!"
context "Document" do
context "#scrub!" do
context ":escape" do
it "escape bad tags" do
doc = Loofah::HTML::Document.parse "#{INVALID_FRAGMENT}"
result = doc.scrub! :escape
assert_equal INVALID_ESCAPED, doc.xpath('/html/body').inner_html
assert_equal doc, result
end
end
context ":prune" do
it "prune bad tags" do
doc = Loofah::HTML::Document.parse "#{INVALID_FRAGMENT}"
result = doc.scrub! :prune
assert_equal INVALID_PRUNED, doc.xpath('/html/body').inner_html
assert_equal doc, result
end
end
context ":strip" do
it "strip bad tags" do
doc = Loofah::HTML::Document.parse "#{INVALID_FRAGMENT}"
result = doc.scrub! :strip
assert_equal INVALID_STRIPPED, doc.xpath('/html/body').inner_html
assert_equal doc, result
end
end
context ":whitewash" do
it "whitewash the markup" do
doc = Loofah::HTML::Document.parse "#{WHITEWASH_FRAGMENT}"
result = doc.scrub! :whitewash
assert_equal WHITEWASH_RESULT, doc.xpath('/html/body').inner_html
assert_equal doc, result
end
end
context ":nofollow" do
it "add a 'nofollow' attribute to hyperlinks" do
doc = Loofah::HTML::Document.parse "#{NOFOLLOW_FRAGMENT}"
result = doc.scrub! :nofollow
assert_equal NOFOLLOW_RESULT, doc.xpath('/html/body').inner_html
assert_equal doc, result
end
end
context ":unprintable" do
it "removes unprintable unicode characters" do
doc = Loofah::HTML::Document.parse "#{UNPRINTABLE_FRAGMENT}"
result = doc.scrub! :unprintable
assert_equal UNPRINTABLE_RESULT, doc.xpath("/html/body").inner_html
assert_equal doc, result
end
end
end
context "#scrub_document" do
it "be a shortcut for parse-and-scrub" do
mock_doc = Object.new
mock(Loofah).document(:string_or_io) { mock_doc }
mock(mock_doc).scrub!(:method)
Loofah.scrub_document(:string_or_io, :method)
end
end
context "#text" do
it "leave behind only inner text with html entities still escaped" do
doc = Loofah::HTML::Document.parse "#{ENTITY_HACK_ATTACK}"
result = doc.text
assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
end
context "with encode_special_chars => false" do
it "leave behind only inner text with html entities unescaped" do
doc = Loofah::HTML::Document.parse "#{ENTITY_HACK_ATTACK}"
result = doc.text(:encode_special_chars => false)
assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB_UNESC, result
end
end
context "with encode_special_chars => true" do
it "leave behind only inner text with html entities still escaped" do
doc = Loofah::HTML::Document.parse "#{ENTITY_HACK_ATTACK}"
result = doc.text(:encode_special_chars => true)
assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
end
end
end
context "#to_s" do
it "generate HTML" do
doc = Loofah.scrub_document "quuxfoo
", :prune
refute_nil doc.xpath("/html").first
refute_nil doc.xpath("/html/head").first
refute_nil doc.xpath("/html/body").first
string = doc.to_s
assert_match %r//, string
assert_match %r//, string
assert_match %r//, string
end
end
context "#serialize" do
it "generate HTML" do
doc = Loofah.scrub_document "quuxfoo
", :prune
refute_nil doc.xpath("/html").first
refute_nil doc.xpath("/html/head").first
refute_nil doc.xpath("/html/body").first
string = doc.serialize
assert_match %r//, string
assert_match %r//, string
assert_match %r//, string
end
end
context "Node" do
context "#scrub!" do
it "only scrub subtree" do
xml = Loofah.document <<-EOHTML
EOHTML
node = xml.at_css "div.scrub"
node.scrub!(:prune)
assert_match %r/I should remain/, xml.to_s
refute_match %r/I should be removed/, xml.to_s
end
end
end
context "NodeSet" do
context "#scrub!" do
it "only scrub subtrees" do
xml = Loofah.document <<-EOHTML
EOHTML
node_set = xml.css "div.scrub"
assert_equal 2, node_set.length
node_set.scrub!(:prune)
assert_match %r/I should remain/, xml.to_s
refute_match %r/I should be removed/, xml.to_s
refute_match %r/I should also be removed/, xml.to_s
end
end
end
end
context "DocumentFragment" do
context "#scrub!" do
context ":escape" do
it "escape bad tags" do
doc = Loofah::HTML::DocumentFragment.parse "#{INVALID_FRAGMENT}
"
result = doc.scrub! :escape
assert_equal INVALID_ESCAPED, doc.xpath("./div").inner_html
assert_equal doc, result
end
end
context ":prune" do
it "prune bad tags" do
doc = Loofah::HTML::DocumentFragment.parse "#{INVALID_FRAGMENT}
"
result = doc.scrub! :prune
assert_equal INVALID_PRUNED, doc.xpath("./div").inner_html
assert_equal doc, result
end
end
context ":strip" do
it "strip bad tags" do
doc = Loofah::HTML::DocumentFragment.parse "#{INVALID_FRAGMENT}
"
result = doc.scrub! :strip
assert_equal INVALID_STRIPPED, doc.xpath("./div").inner_html
assert_equal doc, result
end
end
context ":whitewash" do
it "whitewash the markup" do
doc = Loofah::HTML::DocumentFragment.parse "#{WHITEWASH_FRAGMENT}
"
result = doc.scrub! :whitewash
assert_equal WHITEWASH_RESULT, doc.xpath("./div").inner_html
assert_equal doc, result
end
end
context ":nofollow" do
context "for a hyperlink that does not have a rel attribute" do
it "add a 'nofollow' attribute to hyperlinks" do
doc = Loofah::HTML::DocumentFragment.parse "#{NOFOLLOW_FRAGMENT}
"
result = doc.scrub! :nofollow
assert_equal NOFOLLOW_RESULT, doc.xpath("./div").inner_html
assert_equal doc, result
end
end
context "for a hyperlink that does have a rel attribute" do
it "appends nofollow to rel attribute" do
doc = Loofah::HTML::DocumentFragment.parse "#{NOFOLLOW_WITH_REL_FRAGMENT}
"
result = doc.scrub! :nofollow
assert_equal NOFOLLOW_WITH_REL_RESULT, doc.xpath("./div").inner_html
assert_equal doc, result
end
end
end
context ":noopener" do
context "for a hyperlink without a 'rel' attribute" do
it "add a 'noopener' attribute to hyperlinks" do
doc = Loofah::HTML::DocumentFragment.parse "#{NOOPENER_FRAGMENT}
"
result = doc.scrub! :noopener
assert_equal NOOPENER_RESULT, doc.xpath("./div").inner_html
assert_equal doc, result
end
end
context "for a hyperlink that does have a rel attribute" do
it "appends 'noopener' to 'rel' attribute" do
doc = Loofah::HTML::DocumentFragment.parse "#{NOOPENER_WITH_REL_FRAGMENT}
"
result = doc.scrub! :noopener
assert_equal NOOPENER_WITH_REL_RESULT, doc.xpath("./div").inner_html
assert_equal doc, result
end
end
end
context ":unprintable" do
it "removes unprintable unicode characters" do
doc = Loofah::HTML::DocumentFragment.parse "#{UNPRINTABLE_FRAGMENT}
"
result = doc.scrub! :unprintable
assert_equal UNPRINTABLE_RESULT, doc.xpath("./div").inner_html
assert_equal doc, result
end
end
end
context "#scrub_fragment" do
it "be a shortcut for parse-and-scrub" do
mock_doc = Object.new
mock(Loofah).fragment(:string_or_io) { mock_doc }
mock(mock_doc).scrub!(:method)
Loofah.scrub_fragment(:string_or_io, :method)
end
end
context "#text" do
it "leave behind only inner text with html entities still escaped" do
doc = Loofah::HTML::DocumentFragment.parse "#{ENTITY_HACK_ATTACK}
"
result = doc.text
assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
end
context "with encode_special_chars => false" do
it "leave behind only inner text with html entities unescaped" do
doc = Loofah::HTML::DocumentFragment.parse "#{ENTITY_HACK_ATTACK}
"
result = doc.text(:encode_special_chars => false)
assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB_UNESC, result
end
end
context "with encode_special_chars => true" do
it "leave behind only inner text with html entities still escaped" do
doc = Loofah::HTML::DocumentFragment.parse "#{ENTITY_HACK_ATTACK}
"
result = doc.text(:encode_special_chars => true)
assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
end
end
end
context "#to_s" do
it "not remove entities" do
string = Loofah.scrub_fragment(ENTITY_FRAGMENT, :prune).to_s
assert_match %r/this is </, string
end
end
context "Node" do
context "#scrub!" do
it "only scrub subtree" do
xml = Loofah.fragment <<-EOHTML
EOHTML
node = xml.at_css "div.scrub"
node.scrub!(:prune)
assert_match %r(I should remain), xml.to_s
refute_match %r(I should be removed), xml.to_s
end
end
end
context "NodeSet" do
context "#scrub!" do
it "only scrub subtrees" do
xml = Loofah.fragment <<-EOHTML
EOHTML
node_set = xml.css "div.scrub"
assert_equal 2, node_set.length
node_set.scrub!(:prune)
assert_match %r/I should remain/, xml.to_s
refute_match %r/I should be removed/, xml.to_s
refute_match %r/I should also be removed/, xml.to_s
end
end
end
end
end