test/test_sanitize.rb in sanitize-2.1.1 vs test/test_sanitize.rb in sanitize-3.0.0
- old
+ new
@@ -1,721 +1,93 @@
# encoding: utf-8
-#--
-# Copyright (c) 2013 Ryan Grove <ryan@wonko.com>
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the 'Software'), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-#++
+require_relative 'common'
-require 'rubygems'
-gem 'minitest'
-
-require 'minitest/autorun'
-require 'sanitize'
-
-strings = {
- :basic => {
- :html => '<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script>',
- :default => 'Lorem ipsum dolor sit amet alert("hello world");',
- :restricted => '<b>Lorem</b> ipsum <strong>dolor</strong> sit amet alert("hello world");',
- :basic => '<b>Lorem</b> <a href="pants" rel="nofollow">ipsum</a> <a href="http://foo.com/" rel="nofollow"><strong>dolor</strong></a> sit<br>amet alert("hello world");',
- :relaxed => '<b>Lorem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br>amet alert("hello world");'
- },
-
- :malformed => {
- :html => 'Lo<!-- comment -->rem</b> <a href=pants title="foo>ipsum <a href="http://foo.com/"><strong>dolor</a></strong> sit<br/>amet <script>alert("hello world");',
- :default => 'Lorem dolor sit amet alert("hello world");',
- :restricted => 'Lorem <strong>dolor</strong> sit amet alert("hello world");',
- :basic => 'Lorem <a href="pants" rel="nofollow"><strong>dolor</strong></a> sit<br>amet alert("hello world");',
- :relaxed => 'Lorem <a href="pants" title="foo>ipsum <a href="><strong>dolor</strong></a> sit<br>amet alert("hello world");',
- :document => ' Lorem dolor sit amet alert("hello world"); '
- },
-
- :unclosed => {
- :html => '<p>a</p><blockquote>b',
- :default => ' a b ',
- :restricted => ' a b ',
- :basic => '<p>a</p><blockquote>b</blockquote>',
- :relaxed => '<p>a</p><blockquote>b</blockquote>'
- },
-
- :malicious => {
- :html => '<b>Lo<!-- comment -->rem</b> <a href="javascript:pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <<foo>script>alert("hello world");</script>',
- :default => 'Lorem ipsum dolor sit amet <script>alert("hello world");',
- :restricted => '<b>Lorem</b> ipsum <strong>dolor</strong> sit amet <script>alert("hello world");',
- :basic => '<b>Lorem</b> <a rel="nofollow">ipsum</a> <a href="http://foo.com/" rel="nofollow"><strong>dolor</strong></a> sit<br>amet <script>alert("hello world");',
- :relaxed => '<b>Lorem</b> <a title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br>amet <script>alert("hello world");'
- },
-
- :raw_comment => {
- :html => '<!-- comment -->Hello',
- :default => 'Hello',
- :restricted => 'Hello',
- :basic => 'Hello',
- :relaxed => 'Hello',
- :document => ' Hello ',
- }
-}
-
-tricky = {
- 'protocol-based JS injection: simple, no spaces' => {
- :html => '<a href="javascript:alert(\'XSS\');">foo</a>',
- :default => 'foo',
- :restricted => 'foo',
- :basic => '<a rel="nofollow">foo</a>',
- :relaxed => '<a>foo</a>'
- },
-
- 'protocol-based JS injection: simple, spaces before' => {
- :html => '<a href="javascript :alert(\'XSS\');">foo</a>',
- :default => 'foo',
- :restricted => 'foo',
- :basic => '<a rel="nofollow">foo</a>',
- :relaxed => '<a>foo</a>'
- },
-
- 'protocol-based JS injection: simple, spaces after' => {
- :html => '<a href="javascript: alert(\'XSS\');">foo</a>',
- :default => 'foo',
- :restricted => 'foo',
- :basic => '<a rel="nofollow">foo</a>',
- :relaxed => '<a>foo</a>'
- },
-
- 'protocol-based JS injection: simple, spaces before and after' => {
- :html => '<a href="javascript : alert(\'XSS\');">foo</a>',
- :default => 'foo',
- :restricted => 'foo',
- :basic => '<a rel="nofollow">foo</a>',
- :relaxed => '<a>foo</a>'
- },
-
- 'protocol-based JS injection: preceding colon' => {
- :html => '<a href=":javascript:alert(\'XSS\');">foo</a>',
- :default => 'foo',
- :restricted => 'foo',
- :basic => '<a rel="nofollow">foo</a>',
- :relaxed => '<a>foo</a>'
- },
-
- 'protocol-based JS injection: UTF-8 encoding' => {
- :html => '<a href="javascript:">foo</a>',
- :default => 'foo',
- :restricted => 'foo',
- :basic => '<a rel="nofollow">foo</a>',
- :relaxed => '<a>foo</a>'
- },
-
- 'protocol-based JS injection: long UTF-8 encoding' => {
- :html => '<a href="javascript:">foo</a>',
- :default => 'foo',
- :restricted => 'foo',
- :basic => '<a rel="nofollow">foo</a>',
- :relaxed => '<a>foo</a>'
- },
-
- 'protocol-based JS injection: long UTF-8 encoding without semicolons' => {
- :html => '<a href=javascript:alert('XSS')>foo</a>',
- :default => 'foo',
- :restricted => 'foo',
- :basic => '<a rel="nofollow">foo</a>',
- :relaxed => '<a>foo</a>'
- },
-
- 'protocol-based JS injection: hex encoding' => {
- :html => '<a href="javascript:">foo</a>',
- :default => 'foo',
- :restricted => 'foo',
- :basic => '<a rel="nofollow">foo</a>',
- :relaxed => '<a>foo</a>'
- },
-
- 'protocol-based JS injection: long hex encoding' => {
- :html => '<a href="javascript:">foo</a>',
- :default => 'foo',
- :restricted => 'foo',
- :basic => '<a rel="nofollow">foo</a>',
- :relaxed => '<a>foo</a>'
- },
-
- 'protocol-based JS injection: hex encoding without semicolons' => {
- :html => '<a href=javascript:alert('XSS')>foo</a>',
- :default => 'foo',
- :restricted => 'foo',
- :basic => '<a rel="nofollow">foo</a>',
- :relaxed => '<a>foo</a>'
- },
-
- 'protocol-based JS injection: null char' => {
- :html => "<img src=java\0script:alert(\"XSS\")>",
- :default => '',
- :restricted => '',
- :basic => '',
- :relaxed => '<img src="java">' # everything following the null char gets stripped, and URL is considered relative
- },
-
- 'protocol-based JS injection: invalid URL char' => {
- :html => '<img src=java\script:alert("XSS")>',
- :default => '',
- :restricted => '',
- :basic => '',
- :relaxed => '<img>'
- },
-
- 'protocol-based JS injection: spaces and entities' => {
- :html => '<img src="  javascript:alert(\'XSS\');">',
- :default => '',
- :restricted => '',
- :basic => '',
- :relaxed => '<img src>'
- }
-}
-
-describe 'Config::DEFAULT' do
- it 'should translate valid HTML entities' do
- Sanitize.clean("Don't tasé me & bro!").must_equal("Don't tasé me & bro!")
- end
-
- it 'should translate valid HTML entities while encoding unencoded ampersands' do
- Sanitize.clean("cookies² & ¼ créme").must_equal("cookies² & ¼ créme")
- end
-
- it 'should never output '' do
- Sanitize.clean("<a href=''' class=\"' '\">IE6 isn't a real browser</a>").wont_match(/'/)
- end
-
- it 'should not choke on several instances of the same element in a row' do
- Sanitize.clean('<img src="http://www.google.com/intl/en_ALL/images/logo.gif"><img src="http://www.google.com/intl/en_ALL/images/logo.gif"><img src="http://www.google.com/intl/en_ALL/images/logo.gif"><img src="http://www.google.com/intl/en_ALL/images/logo.gif">').must_equal('')
- end
-
- it 'should surround the contents of :whitespace_elements with space characters when removing the element' do
- Sanitize.clean('foo<div>bar</div>baz').must_equal('foo bar baz')
- Sanitize.clean('foo<br>bar<br>baz').must_equal('foo bar baz')
- Sanitize.clean('foo<hr>bar<hr>baz').must_equal('foo bar baz')
- end
-
- strings.each do |name, data|
- it "should clean #{name} HTML" do
- Sanitize.clean(data[:html]).must_equal(data[:default])
+describe 'Sanitize' do
+ describe 'instance methods' do
+ before do
+ @s = Sanitize.new
end
- end
- tricky.each do |name, data|
- it "should not allow #{name}" do
- Sanitize.clean(data[:html]).must_equal(data[:default])
- end
- end
-end
+ describe '#document' do
+ before do
+ @s = Sanitize.new(:elements => ['html'])
+ end
-describe 'Config::RESTRICTED' do
- before { @s = Sanitize.new(Sanitize::Config::RESTRICTED) }
+ it 'should sanitize an HTML document' do
+ @s.document('<!doctype html><html><b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script></html>')
+ .must_equal "<html>Lorem ipsum dolor sit amet alert(\"hello world\");</html>\n"
+ end
- strings.each do |name, data|
- it "should clean #{name} HTML" do
- @s.clean(data[:html]).must_equal(data[:restricted])
+ it 'should not modify the input string' do
+ input = '<!DOCTYPE html><b>foo</b>'
+ @s.document(input)
+ input.must_equal('<!DOCTYPE html><b>foo</b>')
+ end
end
- end
- tricky.each do |name, data|
- it "should not allow #{name}" do
- @s.clean(data[:html]).must_equal(data[:restricted])
- end
- end
-end
+ describe '#fragment' do
+ it 'should sanitize an HTML fragment' do
+ @s.fragment('<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script>')
+ .must_equal 'Lorem ipsum dolor sit amet alert("hello world");'
+ end
-describe 'Config::BASIC' do
- before { @s = Sanitize.new(Sanitize::Config::BASIC) }
+ it 'should not modify the input string' do
+ input = '<b>foo</b>'
+ @s.fragment(input)
+ input.must_equal '<b>foo</b>'
+ end
- it 'should not choke on valueless attributes' do
- @s.clean('foo <a href>foo</a> bar').must_equal('foo <a href rel="nofollow">foo</a> bar')
- end
-
- it 'should downcase attribute names' do
- @s.clean('<a HREF="javascript:alert(\'foo\')">bar</a>').must_equal('<a rel="nofollow">bar</a>')
- end
-
- strings.each do |name, data|
- it "should clean #{name} HTML" do
- @s.clean(data[:html]).must_equal(data[:basic])
+ it 'should not choke on fragments containing <html> or <body>' do
+ @s.fragment('<html><b>foo</b></html>').must_equal 'foo'
+ @s.fragment('<body><b>foo</b></body>').must_equal 'foo'
+ @s.fragment('<html><body><b>foo</b></body></html>').must_equal 'foo'
+ @s.fragment('<!DOCTYPE html><html><body><b>foo</b></body></html>').must_equal 'foo'
+ end
end
- end
- tricky.each do |name, data|
- it "should not allow #{name}" do
- @s.clean(data[:html]).must_equal(data[:basic])
- end
- end
-end
+ describe '#node!' do
+ it 'should sanitize a Nokogiri::XML::Node' do
+ doc = Nokogiri::HTML5.parse('<b>Lo<!-- comment -->rem</b> <a href="pants" title="foo">ipsum</a> <a href="http://foo.com/"><strong>dolor</strong></a> sit<br/>amet <script>alert("hello world");</script>')
+ frag = doc.fragment
-describe 'Config::RELAXED' do
- before { @s = Sanitize.new(Sanitize::Config::RELAXED) }
+ doc.xpath('/html/body/node()').each {|node| frag << node }
- it 'should encode special chars in attribute values' do
- input = '<a href="http://example.com" title="<b>éxamples</b> & things">foo</a>'
- output = Nokogiri::HTML.fragment('<a href="http://example.com" title="<b>éxamples</b> & things">foo</a>').to_xhtml(:encoding => 'utf-8', :indent => 0, :save_with => Nokogiri::XML::Node::SaveOptions::AS_XHTML)
- @s.clean(input).must_equal(output)
- end
+ @s.node!(frag)
+ frag.to_html.must_equal 'Lorem ipsum dolor sit amet alert("hello world");'
+ end
- strings.each do |name, data|
- it "should clean #{name} HTML" do
- @s.clean(data[:html]).must_equal(data[:relaxed])
+ describe "when the given node is a document and <html> isn't whitelisted" do
+ it 'should raise a Sanitize::Error' do
+ doc = Nokogiri::HTML5.parse('foo')
+ proc { @s.node!(doc) }.must_raise Sanitize::Error
+ end
+ end
end
end
- tricky.each do |name, data|
- it "should not allow #{name}" do
- @s.clean(data[:html]).must_equal(data[:relaxed])
+ describe 'class methods' do
+ describe '.document' do
+ it 'should call #document' do
+ Sanitize.stub_instance(:document, proc {|html| html + ' called' }) do
+ Sanitize.document('<html>foo</html>')
+ .must_equal '<html>foo</html> called'
+ end
+ end
end
- end
-end
-describe 'Full Document parser (using clean_document)' do
- before {
- @s = Sanitize.new({:elements => %w[!DOCTYPE html]})
- @default_doctype = "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">"
- }
-
- it 'should require HTML element is whitelisted to prevent parser errors' do
- assert_raises(RuntimeError, 'You must have the HTML element whitelisted') {
- Sanitize.clean_document!('', {:elements => [], :remove_contents => false})
- }
- end
-
- it 'should NOT require HTML element to be whitelisted if remove_contents is true' do
- output = '<!DOCTYPE html><html>foo</html>'
- Sanitize.clean_document!(output, {:remove_contents => true}).must_equal "<!DOCTYPE html>\n\n"
- end
-
- it 'adds a doctype tag if not included' do
- @s.clean_document('').must_equal("#{@default_doctype}\n\n")
- end
-
- it 'should apply whitelist filtering to HTML element' do
- output = "<!DOCTYPE html>\n<html anything='false'></html>\n\n"
- @s.clean_document(output).must_equal("<!DOCTYPE html>\n<html></html>\n")
- end
-
- strings.each do |name, data|
- it "should wrap #{name} with DOCTYPE and HTML tag" do
- output = data[:document] || data[:default]
- @s.clean_document(data[:html]).must_equal("#{@default_doctype}\n<html>#{output}</html>\n")
- end
- end
-
- tricky.each do |name, data|
- it "should wrap #{name} with DOCTYPE and HTML tag" do
- @s.clean_document(data[:html]).must_equal("#{@default_doctype}\n<html>#{data[:default]}</html>\n")
- end
- end
-end
-
-describe 'Custom configs' do
- it 'should allow attributes on all elements if whitelisted under :all' do
- input = '<p class="foo">bar</p>'
-
- Sanitize.clean(input).must_equal(' bar ')
- Sanitize.clean(input, {:elements => ['p'], :attributes => {:all => ['class']}}).must_equal(input)
- Sanitize.clean(input, {:elements => ['p'], :attributes => {'div' => ['class']}}).must_equal('<p>bar</p>')
- Sanitize.clean(input, {:elements => ['p'], :attributes => {'p' => ['title'], :all => ['class']}}).must_equal(input)
- end
-
- it 'should allow comments when :allow_comments == true' do
- input = 'foo <!-- bar --> baz'
- Sanitize.clean(input).must_equal('foo baz')
- Sanitize.clean(input, :allow_comments => true).must_equal(input)
- end
-
- it 'should allow relative URLs containing colons where the colon is not in the first path segment' do
- input = '<a href="/wiki/Special:Random">Random Page</a>'
- Sanitize.clean(input, { :elements => ['a'], :attributes => {'a' => ['href']}, :protocols => { 'a' => { 'href' => [:relative] }} }).must_equal(input)
- end
-
- it 'should allow relative URLs containing colons where the colon is part of an anchor' do
- input = '<a href="#fn:1">Footnote 1</a>'
- Sanitize.clean(input, { :elements => ['a'], :attributes => {'a' => ['href']}, :protocols => { 'a' => { 'href' => [:relative] }} }).must_equal(input)
- end
-
- it 'should allow relative URLs containing colons where the colon is part of an anchor' do
- input = '<a href="somepage#fn:1">Footnote 1</a>'
- Sanitize.clean(input, { :elements => ['a'], :attributes => {'a' => ['href']}, :protocols => { 'a' => { 'href' => [:relative] }} }).must_equal(input)
- end
-
- it 'should output HTML when :output == :html' do
- input = 'foo<br/>bar<br>baz'
- Sanitize.clean(input, :elements => ['br'], :output => :html).must_equal('foo<br>bar<br>baz')
- end
-
- it 'should remove the contents of filtered nodes when :remove_contents == true' do
- Sanitize.clean('foo bar <div>baz<span>quux</span></div>', :remove_contents => true).must_equal('foo bar ')
- end
-
- it 'should remove the contents of specified nodes when :remove_contents is an Array of element names as strings' do
- Sanitize.clean('foo bar <div>baz<span>quux</span><script>alert("hello!");</script></div>', :remove_contents => ['script', 'span']).must_equal('foo bar baz ')
- end
-
- it 'should remove the contents of specified nodes when :remove_contents is an Array of element names as symbols' do
- Sanitize.clean('foo bar <div>baz<span>quux</span><script>alert("hello!");</script></div>', :remove_contents => [:script, :span]).must_equal('foo bar baz ')
- end
-
- it 'should support encodings other than utf-8' do
- html = 'foo bar'
- Sanitize.clean(html).must_equal("foo\302\240bar")
- Sanitize.clean(html, :output_encoding => 'ASCII').must_equal("foo bar")
- end
-
- it 'should not allow arbitrary HTML5 data attributes by default' do
- config = {
- :elements => ['b']
- }
-
- Sanitize.clean('<b data-foo="bar"></b>', config)
- .must_equal('<b></b>')
-
- config[:attributes] = {'b' => ['class']}
-
- Sanitize.clean('<b class="foo" data-foo="bar"></b>', config)
- .must_equal('<b class="foo"></b>')
- end
-
- it 'should allow arbitrary HTML5 data attributes when the :attributes config includes :data' do
- config = {
- :attributes => {'b' => [:data]},
- :elements => ['b']
- }
-
- Sanitize.clean('<b data-foo="valid" data-bar="valid"></b>', config)
- .must_equal('<b data-foo="valid" data-bar="valid"></b>')
-
- Sanitize.clean('<b data-="invalid"></b>', config)
- .must_equal('<b></b>')
-
- Sanitize.clean('<b data-="invalid"></b>', config)
- .must_equal('<b></b>')
-
- Sanitize.clean('<b data-xml="invalid"></b>', config)
- .must_equal('<b></b>')
-
- Sanitize.clean('<b data-xmlfoo="invalid"></b>', config)
- .must_equal('<b></b>')
-
- Sanitize.clean('<b data-f:oo="valid"></b>', config)
- .must_equal('<b></b>')
-
- Sanitize.clean('<b data-f/oo="partial"></b>', config)
- .must_equal('<b data-f></b>') # Nokogiri quirk; not ideal, but harmless
-
- Sanitize.clean('<b data-éfoo="valid"></b>', config)
- .must_equal('<b></b>') # Another annoying Nokogiri quirk.
- end
-end
-
-describe 'Sanitize.clean' do
- it 'should not modify the input string' do
- input = '<b>foo</b>'
- Sanitize.clean(input)
- input.must_equal('<b>foo</b>')
- end
-
- it 'should return a new string' do
- input = '<b>foo</b>'
- Sanitize.clean(input).must_equal('foo')
- end
-end
-
-describe 'Sanitize.clean!' do
- it 'should modify the input string' do
- input = '<b>foo</b>'
- Sanitize.clean!(input)
- input.must_equal('foo')
- end
-
- it 'should return the string if it was modified' do
- input = '<b>foo</b>'
- Sanitize.clean!(input).must_equal('foo')
- end
-
- it 'should return nil if the string was not modified' do
- input = 'foo'
- Sanitize.clean!(input).must_equal(nil)
- end
-end
-
-describe 'Sanitize.clean_document' do
- before { @config = { :elements => ['html', 'p'] } }
-
- it 'should be idempotent' do
- input = '<!DOCTYPE html><html><p>foo</p></html>'
- first = Sanitize.clean_document(input, @config)
- second = Sanitize.clean_document(first, @config)
- second.must_equal first
- second.wont_be_nil
- end
-
- it 'should handle nil without raising' do
- Sanitize.clean_document(nil).must_equal nil
- end
-
- it 'should not modify the input string' do
- input = '<!DOCTYPE html><b>foo</b>'
- Sanitize.clean_document(input, @config)
- input.must_equal('<!DOCTYPE html><b>foo</b>')
- end
-
- it 'should return a new string' do
- input = '<!DOCTYPE html><b>foo</b>'
- Sanitize.clean_document(input, @config).must_equal("<!DOCTYPE html>\n<html>foo</html>\n")
- end
-end
-
-describe 'Sanitize.clean_document!' do
- before { @config = { :elements => ['html'] } }
-
- it 'should modify the input string' do
- input = '<!DOCTYPE html><html><body><b>foo</b></body></html>'
- Sanitize.clean_document!(input, @config)
- input.must_equal("<!DOCTYPE html>\n<html>foo</html>\n")
- end
-
- it 'should return the string if it was modified' do
- input = '<!DOCTYPE html><html><body><b>foo</b></body></html>'
- Sanitize.clean_document!(input, @config).must_equal("<!DOCTYPE html>\n<html>foo</html>\n")
- end
-
- it 'should return nil if the string was not modified' do
- input = "<!DOCTYPE html>\n<html></html>\n"
- Sanitize.clean_document!(input, @config).must_equal(nil)
- end
-end
-
-describe 'transformers' do
- # YouTube embed transformer.
- youtube = lambda do |env|
- node = env[:node]
- node_name = env[:node_name]
-
- # Don't continue if this node is already whitelisted or is not an element.
- return if env[:is_whitelisted] || !node.element?
-
- # Don't continue unless the node is an iframe.
- return unless node_name == 'iframe'
-
- # Verify that the video URL is actually a valid YouTube video URL.
- return unless node['src'] =~ /\Ahttps?:\/\/(?:www\.)?youtube(?:-nocookie)?\.com\//
-
- # We're now certain that this is a YouTube embed, but we still need to run
- # it through a special Sanitize step to ensure that no unwanted elements or
- # attributes that don't belong in a YouTube embed can sneak in.
- Sanitize.clean_node!(node, {
- :elements => %w[iframe],
-
- :attributes => {
- 'iframe' => %w[allowfullscreen frameborder height src width]
- }
- })
-
- # Now that we're sure that this is a valid YouTube embed and that there are
- # no unwanted elements or attributes hidden inside it, we can tell Sanitize
- # to whitelist the current node.
- {:node_whitelist => [node]}
- end
-
- it 'should receive a complete env Hash as input' do
- Sanitize.clean!('<SPAN>foo</SPAN>', :foo => :bar, :transformers => lambda {|env|
- return unless env[:node].element?
-
- env[:config][:foo].must_equal(:bar)
- env[:is_whitelisted].must_equal(false)
- env[:node].must_be_kind_of(Nokogiri::XML::Node)
- env[:node_name].must_equal('span')
- env[:node_whitelist].must_be_kind_of(Set)
- env[:node_whitelist].must_be_empty
- })
- end
-
- it 'should traverse all node types, including the fragment itself' do
- nodes = []
-
- Sanitize.clean!('<div>foo</div><!--bar--><script>cdata!</script>', :transformers => proc {|env|
- nodes << env[:node_name]
- })
-
- nodes.must_equal(%w[
- text div comment #cdata-section script #document-fragment
- ])
- end
-
- it 'should traverse in depth-first mode by default' do
- nodes = []
-
- Sanitize.clean!('<div><span>foo</span></div><p>bar</p>', :transformers => proc {|env|
- env[:traversal_mode].must_equal(:depth)
- nodes << env[:node_name] if env[:node].element?
- })
-
- nodes.must_equal(['span', 'div', 'p'])
- end
-
- it 'should traverse in breadth-first mode when using :transformers_breadth' do
- nodes = []
-
- Sanitize.clean!('<div><span>foo</span></div><p>bar</p>', :transformers_breadth => proc {|env|
- env[:traversal_mode].must_equal(:breadth)
- nodes << env[:node_name] if env[:node].element?
- })
-
- nodes.must_equal(['div', 'span', 'p'])
- end
-
- it 'should whitelist nodes in the node whitelist' do
- Sanitize.clean!('<div class="foo">foo</div><span>bar</span>', :transformers => [
- proc {|env|
- {:node_whitelist => [env[:node]]} if env[:node_name] == 'div'
- },
-
- proc {|env|
- env[:is_whitelisted].must_equal(false) unless env[:node_name] == 'div'
- env[:is_whitelisted].must_equal(true) if env[:node_name] == 'div'
- env[:node_whitelist].must_include(env[:node]) if env[:node_name] == 'div'
- }
- ]).must_equal('<div class="foo">foo</div>bar')
- end
-
- it 'should clear the node whitelist after each fragment' do
- called = false
-
- Sanitize.clean!('<div>foo</div>', :transformers => proc {|env|
- {:node_whitelist => [env[:node]]}
- })
-
- Sanitize.clean!('<div>foo</div>', :transformers => proc {|env|
- called = true
- env[:is_whitelisted].must_equal(false)
- env[:node_whitelist].must_be_empty
- })
-
- called.must_equal(true)
- end
-
- it 'should allow youtube video embeds via the youtube transformer' do
- input = '<iframe width="420" height="315" src="http://www.youtube.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen bogus="bogus"><script>alert()</script></iframe>'
- output = Nokogiri::HTML::DocumentFragment.parse('<iframe width="420" height="315" src="http://www.youtube.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen>alert()</iframe>').to_html(:encoding => 'utf-8', :indent => 0)
-
- Sanitize.clean!(input, :transformers => youtube).must_equal(output)
- end
-
- it 'should allow https youtube video embeds via the youtube transformer' do
- input = '<iframe width="420" height="315" src="https://www.youtube.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen bogus="bogus"><script>alert()</script></iframe>'
- output = Nokogiri::HTML::DocumentFragment.parse('<iframe width="420" height="315" src="https://www.youtube.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen>alert()</iframe>').to_html(:encoding => 'utf-8', :indent => 0)
-
- Sanitize.clean!(input, :transformers => youtube).must_equal(output)
- end
-
- it 'should allow privacy-enhanced youtube video embeds via the youtube transformer' do
- input = '<iframe width="420" height="315" src="http://www.youtube-nocookie.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen bogus="bogus"><script>alert()</script></iframe>'
- output = Nokogiri::HTML::DocumentFragment.parse('<iframe width="420" height="315" src="http://www.youtube-nocookie.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen>alert()</iframe>').to_html(:encoding => 'utf-8', :indent => 0)
-
- Sanitize.clean!(input, :transformers => youtube).must_equal(output)
- end
-
- it 'should not allow non-youtube video embeds via the youtube transformer' do
- input = '<iframe width="420" height="315" src="http://www.fake-youtube.com/embed/QH2-TGUlwu4" frameborder="0" allowfullscreen></iframe>'
- output = ''
-
- Sanitize.clean!(input, :transformers => youtube).must_equal(output)
- end
-end
-
-describe 'bugs' do
- it 'should not have Nokogiri 1.4.2+ unterminated script/style element bug' do
- Sanitize.clean!('foo <script>bar').must_equal('foo bar')
- Sanitize.clean!('foo <style>bar').must_equal('foo bar')
- end
-end
-
-describe 'Malicious HTML' do
- make_my_diffs_pretty!
- parallelize_me!
-
- before do
- @s = Sanitize.new(Sanitize::Config::RELAXED)
- end
-
- # libxml2 >= 2.9.2 doesn't escape comments within some attributes, in an
- # attempt to preserve server-side includes. This can result in XSS since an
- # unescaped double quote can allow an attacker to inject a non-whitelisted
- # attribute. Sanitize works around this by implementing its own escaping for
- # affected attributes.
- #
- # The relevant libxml2 code is here:
- # <https://github.com/GNOME/libxml2/commit/960f0e275616cadc29671a218d7fb9b69eb35588>
- describe 'unsafe libxml2 server-side includes in attributes' do
- tag_configs = [
- {
- tag_name: 'a',
- escaped_attrs: %w[ action href src name ],
- unescaped_attrs: []
- },
-
- {
- tag_name: 'div',
- escaped_attrs: %w[ action href src ],
- unescaped_attrs: %w[ name ]
- }
- ]
-
- before do
- @s = Sanitize.new({
- elements: %w[ a div ],
-
- attributes: {
- all: %w[ action href src name ]
- }
- })
- end
-
- tag_configs.each do |tag_config|
- tag_name = tag_config[:tag_name]
-
- tag_config[:escaped_attrs].each do |attr_name|
- input = %[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>]
-
- it 'should escape unsafe characters in attributes' do
- @s.clean(input).must_equal(%[<#{tag_name} #{attr_name}="examp<!--%22%20onmouseover=alert(1)>-->le.com">foo</#{tag_name}>])
+ describe '.fragment' do
+ it 'should call #fragment' do
+ Sanitize.stub_instance(:fragment, proc {|html| html + ' called' }) do
+ Sanitize.fragment('<b>foo</b>').must_equal '<b>foo</b> called'
end
-
- it 'should round-trip to the same output' do
- output = @s.clean(input)
- @s.clean(output).must_equal(output)
- end
end
+ end
- tag_config[:unescaped_attrs].each do |attr_name|
- input = %[<#{tag_name} #{attr_name}='examp<!--" onmouseover=alert(1)>-->le.com'>foo</#{tag_name}>]
-
- it 'should not escape characters unnecessarily' do
- @s.clean(input).must_equal(input)
+ describe '.node!' do
+ it 'should call #node!' do
+ Sanitize.stub_instance(:node!, proc {|input| input + ' called' }) do
+ Sanitize.node!('not really a node').must_equal 'not really a node called'
end
-
- it 'should round-trip to the same output' do
- output = @s.clean(input)
- @s.clean(output).must_equal(output)
- end
end
end
end
end
-