test/test_basic.rb in mdalessio-dryopteris-0.1.1 vs test/test_basic.rb in mdalessio-dryopteris-0.1.2

- old
+ new

@@ -1,9 +1,81 @@ require File.expand_path(File.join(File.dirname(__FILE__), 'helper')) +if defined? Nokogiri::VERSION_INFO + puts "=> running with Nokogiri #{Nokogiri::VERSION_INFO.inspect}" +else + puts "=> running with Nokogiri #{Nokogiri::VERSION} / libxml #{Nokogiri::LIBXML_PARSER_VERSION}" +end + class TestBasic < Test::Unit::TestCase + MSWORD_HTML = <<-EOHTML +<meta http-equiv="Content-Type" content="text/html; charset=utf-8"><meta name="ProgId" content="Word.Document"><meta name="Generator" content="Microsoft Word 11"><meta name="Originator" content="Microsoft Word 11"><link rel="File-List" href="file:///C:%5CDOCUME%7E1%5CNICOLE%7E1%5CLOCALS%7E1%5CTemp%5Cmsohtml1%5C01%5Cclip_filelist.xml"><!--[if gte mso 9]><xml> +<w:WordDocument> + <w:View>Normal</w:View> + <w:Zoom>0</w:Zoom> + <w:PunctuationKerning/> + <w:ValidateAgainstSchemas/> + <w:SaveIfXMLInvalid>false</w:SaveIfXMLInvalid> + <w:IgnoreMixedContent>false</w:IgnoreMixedContent> + <w:AlwaysShowPlaceholderText>false</w:AlwaysShowPlaceholderText> + <w:Compatibility> + <w:BreakWrappedTables/> + <w:SnapToGridInCell/> + <w:WrapTextWithPunct/> + <w:UseAsianBreakRules/> + <w:DontGrowAutofit/> + </w:Compatibility> + <w:BrowserLevel>MicrosoftInternetExplorer4</w:BrowserLevel> +</w:WordDocument> +</xml><![endif]--><!--[if gte mso 9]><xml> +<w:LatentStyles DefLockedState="false" LatentStyleCount="156"> +</w:LatentStyles> +</xml><![endif]--><style> +<!-- +/* Style Definitions */ +p.MsoNormal, li.MsoNormal, div.MsoNormal +{mso-style-parent:""; +margin:0in; +margin-bottom:.0001pt; +mso-pagination:widow-orphan; +font-size:12.0pt; +font-family:"Times New Roman"; +mso-fareast-font-family:"Times New Roman";} +@page Section1 +{size:8.5in 11.0in; +margin:1.0in 1.25in 1.0in 1.25in; +mso-header-margin:.5in; +mso-footer-margin:.5in; +mso-paper-source:0;} +div.Section1 +{page:Section1;} +--> +</style><!--[if gte mso 10]> +<style> +/* Style Definitions */ +table.MsoNormalTable +{mso-style-name:"Table Normal"; +mso-tstyle-rowband-size:0; +mso-tstyle-colband-size:0; +mso-style-noshow:yes; +mso-style-parent:""; +mso-padding-alt:0in 5.4pt 0in 5.4pt; +mso-para-margin:0in; +mso-para-margin-bottom:.0001pt; +mso-pagination:widow-orphan; +font-size:10.0pt; +font-family:"Times New Roman"; +mso-ansi-language:#0400; +mso-fareast-language:#0400; +mso-bidi-language:#0400;} +</style> +<![endif]--> + +<p class="MsoNormal">Foo <b style="">BOLD<o:p></o:p></b></p> + EOHTML + def test_nil assert_nil Dryopteris.sanitize(nil) end def test_empty_string @@ -65,82 +137,22 @@ def test_fragment_with_text_nodes_leading_and_trailing assert_equal "text<p>fragment</p>text", Dryopteris.sanitize("text<p>fragment</p>text") end - def test_fragment_with_body_tags - # ignore second open body tag, use first close body tag, ignore everything after that - assert_equal "textfragment", Dryopteris.sanitize("text<body>fragment</body>text") + def test_whitewash_on_fragment + html = "safe<frameset rows=\"*\"><frame src=\"http://example.com\"></frameset> <b>description</b>" + whitewashed = Dryopteris.whitewash_document(html) + assert_equal "<p>safe</p><b>description</b>", whitewashed end - def test_whitewash_on_microsofty_markup - html = <<-EOHTML -<meta http-equiv="Content-Type" content="text/html; charset=utf-8"><meta name="ProgId" content="Word.Document"><meta name="Generator" content="Microsoft Word 11"><meta name="Originator" content="Microsoft Word 11"><link rel="File-List" href="file:///C:%5CDOCUME%7E1%5CNICOLE%7E1%5CLOCALS%7E1%5CTemp%5Cmsohtml1%5C01%5Cclip_filelist.xml"><!--[if gte mso 9]><xml> -<w:WordDocument> - <w:View>Normal</w:View> - <w:Zoom>0</w:Zoom> - <w:PunctuationKerning/> - <w:ValidateAgainstSchemas/> - <w:SaveIfXMLInvalid>false</w:SaveIfXMLInvalid> - <w:IgnoreMixedContent>false</w:IgnoreMixedContent> - <w:AlwaysShowPlaceholderText>false</w:AlwaysShowPlaceholderText> - <w:Compatibility> - <w:BreakWrappedTables/> - <w:SnapToGridInCell/> - <w:WrapTextWithPunct/> - <w:UseAsianBreakRules/> - <w:DontGrowAutofit/> - </w:Compatibility> - <w:BrowserLevel>MicrosoftInternetExplorer4</w:BrowserLevel> -</w:WordDocument> -</xml><![endif]--><!--[if gte mso 9]><xml> -<w:LatentStyles DefLockedState="false" LatentStyleCount="156"> -</w:LatentStyles> -</xml><![endif]--><style> -<!-- -/* Style Definitions */ -p.MsoNormal, li.MsoNormal, div.MsoNormal -{mso-style-parent:""; -margin:0in; -margin-bottom:.0001pt; -mso-pagination:widow-orphan; -font-size:12.0pt; -font-family:"Times New Roman"; -mso-fareast-font-family:"Times New Roman";} -@page Section1 -{size:8.5in 11.0in; -margin:1.0in 1.25in 1.0in 1.25in; -mso-header-margin:.5in; -mso-footer-margin:.5in; -mso-paper-source:0;} -div.Section1 -{page:Section1;} ---> -</style><!--[if gte mso 10]> -<style> -/* Style Definitions */ -table.MsoNormalTable -{mso-style-name:"Table Normal"; -mso-tstyle-rowband-size:0; -mso-tstyle-colband-size:0; -mso-style-noshow:yes; -mso-style-parent:""; -mso-padding-alt:0in 5.4pt 0in 5.4pt; -mso-para-margin:0in; -mso-para-margin-bottom:.0001pt; -mso-pagination:widow-orphan; -font-size:10.0pt; -font-family:"Times New Roman"; -mso-ansi-language:#0400; -mso-fareast-language:#0400; -mso-bidi-language:#0400;} -</style> -<![endif]--> + def test_whitewash_fragment_on_microsofty_markup + whitewashed = Dryopteris.whitewash(MSWORD_HTML.chomp) + assert_equal "<p>Foo <b>BOLD</b></p>", whitewashed + end -<p class="MsoNormal">Foo <b style="">BOLD<o:p></o:p></b></p> - EOHTML - - whitewashed = Dryopteris.whitewash(html) + def test_whitewash_on_microsofty_markup + whitewashed = Dryopteris.whitewash_document(MSWORD_HTML) assert_equal "<p>Foo <b>BOLD</b></p>", whitewashed end end