# encoding: utf-8 require_relative 'common' describe 'Sanitize::Transformers::CleanElement' do make_my_diffs_pretty! parallelize_me! strings = { :basic => { :html => 'Lorem ipsum dolor sit
amet ', :default => 'Lorem ipsum dolor sit amet .foo { color: #fff; } alert("hello world");', :restricted => 'Lorem ipsum dolor sit amet .foo { color: #fff; } alert("hello world");', :basic => 'Lorem ipsum dolor sit
amet .foo { color: #fff; } alert("hello world");', :relaxed => 'Lorem ipsum dolor sit
amet alert("hello world");' }, :malformed => { :html => 'Lorem dolor sit
amet ', :default => 'Lorem ipsum dolor sit amet <script>alert("hello world");', :restricted => 'Lorem ipsum dolor sit amet <script>alert("hello world");', :basic => 'Lorem ipsum dolor sit
amet <script>alert("hello world");', :relaxed => 'Lorem ipsum dolor sit
amet <script>alert("hello world");' } } protocols = { 'protocol-based JS injection: simple, no spaces' => { :html => 'foo', :default => 'foo', :restricted => 'foo', :basic => 'foo', :relaxed => 'foo' }, 'protocol-based JS injection: simple, spaces before' => { :html => 'foo', :default => 'foo', :restricted => 'foo', :basic => 'foo', :relaxed => 'foo' }, 'protocol-based JS injection: simple, spaces after' => { :html => 'foo', :default => 'foo', :restricted => 'foo', :basic => 'foo', :relaxed => 'foo' }, 'protocol-based JS injection: simple, spaces before and after' => { :html => 'foo', :default => 'foo', :restricted => 'foo', :basic => 'foo', :relaxed => 'foo' }, 'protocol-based JS injection: preceding colon' => { :html => 'foo', :default => 'foo', :restricted => 'foo', :basic => 'foo', :relaxed => 'foo' }, 'protocol-based JS injection: UTF-8 encoding' => { :html => 'foo', :default => 'foo', :restricted => 'foo', :basic => 'foo', :relaxed => 'foo' }, 'protocol-based JS injection: long UTF-8 encoding' => { :html => 'foo', :default => 'foo', :restricted => 'foo', :basic => 'foo', :relaxed => 'foo' }, 'protocol-based JS injection: long UTF-8 encoding without semicolons' => { :html => 'foo', :default => 'foo', :restricted => 'foo', :basic => 'foo', :relaxed => 'foo' }, 'protocol-based JS injection: hex encoding' => { :html => 'foo', :default => 'foo', :restricted => 'foo', :basic => 'foo', :relaxed => 'foo' }, 'protocol-based JS injection: long hex encoding' => { :html => 'foo', :default => 'foo', :restricted => 'foo', :basic => 'foo', :relaxed => 'foo' }, 'protocol-based JS injection: hex encoding without semicolons' => { :html => 'foo', :default => 'foo', :restricted => 'foo', :basic => 'foo', :relaxed => 'foo' }, 'protocol-based JS injection: null char' => { :html => "", :default => '', :restricted => '', :basic => '', :relaxed => '' }, 'protocol-based JS injection: invalid URL char' => { :html => '', :default => '', :restricted => '', :basic => '', :relaxed => '' }, 'protocol-based JS injection: spaces and entities' => { :html => '', :default => '', :restricted => '', :basic => '', :relaxed => '' }, 'protocol whitespace' => { :html => '', :default => '', :restricted => '', :basic => '', :relaxed => '' } } describe 'Default config' do it 'should remove non-whitelisted elements, leaving safe contents behind' do Sanitize.fragment('foo bar baz quux') .must_equal 'foo bar baz quux' Sanitize.fragment('') .must_equal 'alert("<xss>");' Sanitize.fragment('<') .must_equal '< script <>> alert("");' end it 'should surround the contents of :whitespace_elements with space characters when removing the element' do Sanitize.fragment('foo
bar
baz') .must_equal 'foo bar baz' Sanitize.fragment('foo
bar
baz') .must_equal 'foo bar baz' Sanitize.fragment('foo
bar
baz') .must_equal 'foo bar baz' end it 'should not choke on several instances of the same element in a row' do Sanitize.fragment('') .must_equal '' end strings.each do |name, data| it "should clean #{name} HTML" do Sanitize.fragment(data[:html]).must_equal(data[:default]) end end protocols.each do |name, data| it "should not allow #{name}" do Sanitize.fragment(data[:html]).must_equal(data[:default]) end end end describe 'Restricted config' do before do @s = Sanitize.new(Sanitize::Config::RESTRICTED) end strings.each do |name, data| it "should clean #{name} HTML" do @s.fragment(data[:html]).must_equal(data[:restricted]) end end protocols.each do |name, data| it "should not allow #{name}" do @s.fragment(data[:html]).must_equal(data[:restricted]) end end end describe 'Basic config' do before do @s = Sanitize.new(Sanitize::Config::BASIC) end it 'should not choke on valueless attributes' do @s.fragment('foo foo bar') .must_equal 'foo foo bar' end it 'should downcase attribute names' do @s.fragment('bar') .must_equal 'bar' end strings.each do |name, data| it "should clean #{name} HTML" do @s.fragment(data[:html]).must_equal(data[:basic]) end end protocols.each do |name, data| it "should not allow #{name}" do @s.fragment(data[:html]).must_equal(data[:basic]) end end end describe 'Relaxed config' do before do @s = Sanitize.new(Sanitize::Config::RELAXED) end it 'should encode special chars in attribute values' do @s.fragment('foo') .must_equal 'foo' end strings.each do |name, data| it "should clean #{name} HTML" do @s.fragment(data[:html]).must_equal(data[:relaxed]) end end protocols.each do |name, data| it "should not allow #{name}" do @s.fragment(data[:html]).must_equal(data[:relaxed]) end end end describe 'Custom configs' do it 'should allow attributes on all elements if whitelisted under :all' do input = '

bar

' Sanitize.fragment(input).must_equal ' bar ' Sanitize.fragment(input, { :elements => ['p'], :attributes => {:all => ['class']} }).must_equal input Sanitize.fragment(input, { :elements => ['p'], :attributes => {'div' => ['class']} }).must_equal '

bar

' Sanitize.fragment(input, { :elements => ['p'], :attributes => {'p' => ['title'], :all => ['class']} }).must_equal input end it "should not allow relative URLs when relative URLs aren't whitelisted" do input = 'Link' Sanitize.fragment(input, :elements => ['a'], :attributes => {'a' => ['href']}, :protocols => {'a' => {'href' => ['http']}} ).must_equal 'Link' end it 'should allow relative URLs containing colons when the colon is not in the first path segment' do input = 'Random Page' Sanitize.fragment(input, { :elements => ['a'], :attributes => {'a' => ['href']}, :protocols => {'a' => {'href' => [:relative]}} }).must_equal input end it 'should allow relative URLs containing colons when the colon is part of an anchor' do input = 'Footnote 1' Sanitize.fragment(input, { :elements => ['a'], :attributes => {'a' => ['href']}, :protocols => {'a' => {'href' => [:relative]}} }).must_equal input input = 'Footnote 1' Sanitize.fragment(input, { :elements => ['a'], :attributes => {'a' => ['href']}, :protocols => {'a' => {'href' => [:relative]}} }).must_equal input end it 'should remove the contents of filtered nodes when :remove_contents is true' do Sanitize.fragment('foo bar
bazquux
', :remove_contents => true ).must_equal 'foo bar ' end it 'should remove the contents of specified nodes when :remove_contents is an Array of element names as strings' do Sanitize.fragment('foo bar
bazquux
', :remove_contents => ['script', 'span'] ).must_equal 'foo bar baz ' end it 'should remove the contents of specified nodes when :remove_contents is an Array of element names as symbols' do Sanitize.fragment('foo bar
bazquux
', :remove_contents => [:script, :span] ).must_equal 'foo bar baz ' end it 'should not allow arbitrary HTML5 data attributes by default' do Sanitize.fragment('', :elements => ['b'] ).must_equal '' Sanitize.fragment('', :attributes => {'b' => ['class']}, :elements => ['b'] ).must_equal '' end it 'should allow arbitrary HTML5 data attributes when the :attributes config includes :data' do s = Sanitize.new( :attributes => {'b' => [:data]}, :elements => ['b'] ) s.fragment('') .must_equal '' s.fragment('') .must_equal '' s.fragment('') .must_equal '' s.fragment('') .must_equal '' s.fragment('') .must_equal '' s.fragment('') .must_equal '' s.fragment('') .must_equal '' # Nokogiri quirk; not ideal, but harmless s.fragment('') .must_equal '' # Another annoying Nokogiri quirk. end it 'should replace whitespace_elements with configured :before and :after values' do s = Sanitize.new( :whitespace_elements => { 'p' => { :before => "\n", :after => "\n" }, 'div' => { :before => "\n", :after => "\n" }, 'br' => { :before => "\n", :after => "\n" }, } ) s.fragment('

foo

').must_equal "\nfoo\n" s.fragment('

foo

bar

').must_equal "\nfoo\n\nbar\n" s.fragment('foo
bar
baz').must_equal "foo\nbar\nbaz" s.fragment('foo
bar
baz').must_equal "foo\nbar\nbaz" end it 'handles protocols correctly regardless of case' do input = 'Text' Sanitize.fragment(input, { :elements => ['a'], :attributes => {'a' => ['href']}, :protocols => {'a' => {'href' => ['https']}} }).must_equal input input = 'Text' Sanitize.fragment(input, { :elements => ['a'], :attributes => {'a' => ['href']}, :protocols => {'a' => {'href' => ['https']}} }).must_equal "Text" end end end