test/test_sanitize.rb in sanitize-2.0.6 vs test/test_sanitize.rb in sanitize-2.1.0

- old
+ new

@@ -342,10 +342,20 @@ it 'should allow relative URLs containing colons where the colon is not in the first path segment' do input = '<a href="/wiki/Special:Random">Random Page</a>' Sanitize.clean(input, { :elements => ['a'], :attributes => {'a' => ['href']}, :protocols => { 'a' => { 'href' => [:relative] }} }).must_equal(input) end + it 'should allow relative URLs containing colons where the colon is part of an anchor' do + input = '<a href="#fn:1">Footnote 1</a>' + Sanitize.clean(input, { :elements => ['a'], :attributes => {'a' => ['href']}, :protocols => { 'a' => { 'href' => [:relative] }} }).must_equal(input) + end + + it 'should allow relative URLs containing colons where the colon is part of an anchor' do + input = '<a href="somepage#fn:1">Footnote 1</a>' + Sanitize.clean(input, { :elements => ['a'], :attributes => {'a' => ['href']}, :protocols => { 'a' => { 'href' => [:relative] }} }).must_equal(input) + end + it 'should output HTML when :output == :html' do input = 'foo<br/>bar<br>baz' Sanitize.clean(input, :elements => ['br'], :output => :html).must_equal('foo<br>bar<br>baz') end @@ -363,9 +373,54 @@ it 'should support encodings other than utf-8' do html = 'foo&nbsp;bar' Sanitize.clean(html).must_equal("foo\302\240bar") Sanitize.clean(html, :output_encoding => 'ASCII').must_equal("foo&#160;bar") + end + + it 'should not allow arbitrary HTML5 data attributes by default' do + config = { + :elements => ['b'] + } + + Sanitize.clean('<b data-foo="bar"></b>', config) + .must_equal('<b></b>') + + config[:attributes] = {'b' => ['class']} + + Sanitize.clean('<b class="foo" data-foo="bar"></b>', config) + .must_equal('<b class="foo"></b>') + end + + it 'should allow arbitrary HTML5 data attributes when the :attributes config includes :data' do + config = { + :attributes => {'b' => [:data]}, + :elements => ['b'] + } + + Sanitize.clean('<b data-foo="valid" data-bar="valid"></b>', config) + .must_equal('<b data-foo="valid" data-bar="valid"></b>') + + Sanitize.clean('<b data-="invalid"></b>', config) + .must_equal('<b></b>') + + Sanitize.clean('<b data-="invalid"></b>', config) + .must_equal('<b></b>') + + Sanitize.clean('<b data-xml="invalid"></b>', config) + .must_equal('<b></b>') + + Sanitize.clean('<b data-xmlfoo="invalid"></b>', config) + .must_equal('<b></b>') + + Sanitize.clean('<b data-f:oo="valid"></b>', config) + .must_equal('<b></b>') + + Sanitize.clean('<b data-f/oo="partial"></b>', config) + .must_equal('<b data-f></b>') # Nokogiri quirk; not ideal, but harmless + + Sanitize.clean('<b data-éfoo="valid"></b>', config) + .must_equal('<b></b>') # Another annoying Nokogiri quirk. end end describe 'Sanitize.clean' do it 'should not modify the input string' do