require File.expand_path(File.join(File.dirname(__FILE__), '..', 'helper'))
class TestScrubbers < Test::Unit::TestCase
INVALID_FRAGMENT = "foobar
bazz
quux
"
INVALID_ESCAPED = "<invalid>foo<p>bar</p>bazz</invalid>quux
"
INVALID_PRUNED = "quux
"
INVALID_STRIPPED = "foobar
bazzquux
"
WHITEWASH_FRAGMENT = "nofoo
bar"
WHITEWASH_RESULT = "foo
"
NOFOLLOW_FRAGMENT = 'Click here'
NOFOLLOW_RESULT = 'Click here'
ENTITY_FRAGMENT = "this is < that "&" the other > boo'ya
w00t
"
ENTITY_TEXT = %Q(this is < that "&" the other > boo\'yaw00t)
ENTITY_HACK_ATTACK = "Hack attack!
<script>alert('evil')</script>
"
ENTITY_HACK_ATTACK_TEXT_SCRUB = "Hack attack!<script>alert('evil')</script>"
ENTITY_HACK_ATTACK_TEXT_SCRUB_UNESC = "Hack attack!"
context "Document" do
context "#scrub!" do
context ":escape" do
should "escape bad tags" do
doc = Loofah::HTML::Document.parse "#{INVALID_FRAGMENT}"
result = doc.scrub! :escape
assert_equal INVALID_ESCAPED, doc.xpath('/html/body').inner_html
assert_equal doc, result
end
end
context ":prune" do
should "prune bad tags" do
doc = Loofah::HTML::Document.parse "#{INVALID_FRAGMENT}"
result = doc.scrub! :prune
assert_equal INVALID_PRUNED, doc.xpath('/html/body').inner_html
assert_equal doc, result
end
end
context ":strip" do
should "strip bad tags" do
doc = Loofah::HTML::Document.parse "#{INVALID_FRAGMENT}"
result = doc.scrub! :strip
assert_equal INVALID_STRIPPED, doc.xpath('/html/body').inner_html
assert_equal doc, result
end
end
context ":whitewash" do
should "whitewash the markup" do
doc = Loofah::HTML::Document.parse "#{WHITEWASH_FRAGMENT}"
result = doc.scrub! :whitewash
assert_equal WHITEWASH_RESULT, doc.xpath('/html/body').inner_html
assert_equal doc, result
end
end
context ":nofollow" do
should "add a 'nofollow' attribute to hyperlinks" do
doc = Loofah::HTML::Document.parse "#{NOFOLLOW_FRAGMENT}"
result = doc.scrub! :nofollow
assert_equal NOFOLLOW_RESULT, doc.xpath('/html/body').inner_html
assert_equal doc, result
end
end
end
context "#scrub_document" do
should "be a shortcut for parse-and-scrub" do
mock_doc = mock
Loofah.expects(:document).with(:string_or_io).returns(mock_doc)
mock_doc.expects(:scrub!).with(:method)
Loofah.scrub_document(:string_or_io, :method)
end
end
context "#text" do
should "leave behind only inner text with html entities still escaped" do
doc = Loofah::HTML::Document.parse "#{ENTITY_HACK_ATTACK}"
result = doc.text
assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
end
context "with encode_special_chars => false" do
should "leave behind only inner text with html entities unescaped" do
doc = Loofah::HTML::Document.parse "#{ENTITY_HACK_ATTACK}"
result = doc.text(:encode_special_chars => false)
assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB_UNESC, result
end
end
context "with encode_special_chars => true" do
should "leave behind only inner text with html entities still escaped" do
doc = Loofah::HTML::Document.parse "#{ENTITY_HACK_ATTACK}"
result = doc.text(:encode_special_chars => true)
assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
end
end
end
context "#to_s" do
should "generate HTML" do
doc = Loofah.scrub_document "quuxfoo
", :prune
assert_not_nil doc.xpath("/html").first
assert_not_nil doc.xpath("/html/head").first
assert_not_nil doc.xpath("/html/body").first
string = doc.to_s
assert_contains string, //
assert_contains string, //
assert_contains string, //
end
end
context "#serialize" do
should "generate HTML" do
doc = Loofah.scrub_document "quuxfoo
", :prune
assert_not_nil doc.xpath("/html").first
assert_not_nil doc.xpath("/html/head").first
assert_not_nil doc.xpath("/html/body").first
string = doc.serialize
assert_contains string, //
assert_contains string, //
assert_contains string, //
end
end
context "Node" do
context "#scrub!" do
should "only scrub subtree" do
xml = Loofah.document <<-EOHTML
EOHTML
node = xml.at_css "div.scrub"
node.scrub!(:prune)
assert_contains xml.to_s, /I should remain/
assert_does_not_contain xml.to_s, /I should be removed/
end
end
end
context "NodeSet" do
context "#scrub!" do
should "only scrub subtrees" do
xml = Loofah.document <<-EOHTML
EOHTML
node_set = xml.css "div.scrub"
assert_equal 2, node_set.length
node_set.scrub!(:prune)
assert_contains xml.to_s, /I should remain/
assert_does_not_contain xml.to_s, /I should be removed/
assert_does_not_contain xml.to_s, /I should also be removed/
end
end
end
end
context "DocumentFragment" do
context "#scrub!" do
context ":escape" do
should "escape bad tags" do
doc = Loofah::HTML::DocumentFragment.parse "#{INVALID_FRAGMENT}
"
result = doc.scrub! :escape
assert_equal INVALID_ESCAPED, doc.xpath("./div").inner_html
assert_equal doc, result
end
end
context ":prune" do
should "prune bad tags" do
doc = Loofah::HTML::DocumentFragment.parse "#{INVALID_FRAGMENT}
"
result = doc.scrub! :prune
assert_equal INVALID_PRUNED, doc.xpath("./div").inner_html
assert_equal doc, result
end
end
context ":strip" do
should "strip bad tags" do
doc = Loofah::HTML::DocumentFragment.parse "#{INVALID_FRAGMENT}
"
result = doc.scrub! :strip
assert_equal INVALID_STRIPPED, doc.xpath("./div").inner_html
assert_equal doc, result
end
end
context ":whitewash" do
should "whitewash the markup" do
doc = Loofah::HTML::DocumentFragment.parse "#{WHITEWASH_FRAGMENT}
"
result = doc.scrub! :whitewash
assert_equal WHITEWASH_RESULT, doc.xpath("./div").inner_html
assert_equal doc, result
end
end
context ":nofollow" do
should "add a 'nofollow' attribute to hyperlinks" do
doc = Loofah::HTML::DocumentFragment.parse "#{NOFOLLOW_FRAGMENT}
"
result = doc.scrub! :nofollow
assert_equal NOFOLLOW_RESULT, doc.xpath("./div").inner_html
assert_equal doc, result
end
end
end
context "#scrub_fragment" do
should "be a shortcut for parse-and-scrub" do
mock_doc = mock
Loofah.expects(:fragment).with(:string_or_io).returns(mock_doc)
mock_doc.expects(:scrub!).with(:method)
Loofah.scrub_fragment(:string_or_io, :method)
end
end
context "#text" do
should "leave behind only inner text with html entities still escaped" do
doc = Loofah::HTML::DocumentFragment.parse "#{ENTITY_HACK_ATTACK}
"
result = doc.text
assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
end
context "with encode_special_chars => false" do
should "leave behind only inner text with html entities unescaped" do
doc = Loofah::HTML::DocumentFragment.parse "#{ENTITY_HACK_ATTACK}
"
result = doc.text(:encode_special_chars => false)
assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB_UNESC, result
end
end
context "with encode_special_chars => true" do
should "leave behind only inner text with html entities still escaped" do
doc = Loofah::HTML::DocumentFragment.parse "#{ENTITY_HACK_ATTACK}
"
result = doc.text(:encode_special_chars => true)
assert_equal ENTITY_HACK_ATTACK_TEXT_SCRUB, result
end
end
end
context "#to_s" do
should "not remove entities" do
string = Loofah.scrub_fragment(ENTITY_FRAGMENT, :prune).to_s
assert_contains string, /this is </
end
end
context "Node" do
context "#scrub!" do
should "only scrub subtree" do
xml = Loofah.fragment <<-EOHTML
EOHTML
node = xml.at_css "div.scrub"
node.scrub!(:prune)
assert_contains xml.to_s, /I should remain/
assert_does_not_contain xml.to_s, /I should be removed/
end
end
end
context "NodeSet" do
context "#scrub!" do
should "only scrub subtrees" do
xml = Loofah.fragment <<-EOHTML
EOHTML
node_set = xml.css "div.scrub"
assert_equal 2, node_set.length
node_set.scrub!(:prune)
assert_contains xml.to_s, /I should remain/
assert_does_not_contain xml.to_s, /I should be removed/
assert_does_not_contain xml.to_s, /I should also be removed/
end
end
end
end
end