test/unit/text_cleaning_test.rb in jakal-0.2.0 vs test/unit/text_cleaning_test.rb in jakal-0.2.1

- old
+ new

@@ -11,21 +11,22 @@ input = <<-HTML the cat sat on the mat a short line HTML result = Jkl::Text::remove_short_lines input - assert result == "the cat sat on the mat" + assert result == "the cat sat on the mat\n" end should "Remove shorter lines" do input = <<-HTML the cat sat on the mat the cat sat on the slightly fluffy, yet worn and homely mat a short line HTML result = Jkl::Text::remove_short_lines(input, 8) - assert result == "the cat sat on the slightly fluffy, yet worn and homely mat" + expected = "the cat sat on the slightly fluffy, yet worn and homely mat\n" + assert_equal expected, result end should "Remove script tags" do input = <<-HTML the cat sat on the mat @@ -33,39 +34,39 @@ function nofunction(){var bob;} </script> a short line HTML result = Jkl::Text::remove_short_lines input - assert result == "the cat sat on the mat" + assert result == "the cat sat on the mat\n" end should "Remove html comments" do input = <<-HTML the cat sat on the mat <!-- a comment--> a short line HTML result = Jkl::Text::remove_short_lines input - assert result == "the cat sat on the mat" + assert result == "the cat sat on the mat\n" end should "Remove blank lines" do input = <<-HTML the cat sat on the mat a short line HTML result = Jkl::Text::remove_short_lines input - assert result == "the cat sat on the mat" + assert result == "the cat sat on the mat\n" end should "Strip all tags" do input = <<-HTML <p>the cat sat on the mat</p> HTML result = Jkl::Text::strip_all_tags input - assert result == "the cat sat on the mat\n" #TODO fix carriage return + assert result == "the cat sat on the mat\n" end should "Clean text" do input = <<-HTML the cat sat on the mat @@ -74,9 +75,21 @@ </script> <p> some para stuff here </p> some end stuff here HTML result = Jkl::Text::plain_text(input) - assert result == "the cat sat on the mat" + assert result == "the cat sat on the mat\n" + end + + should "Remove HTML escaped characters" do + input = <<HTML +Testing, testing, one two three. +<p><strong>The cat didn&#39;t sit on the mat</strong></p> +HTML + expected = <<EXPECTED +Testing, testing, one two three. +The cat didn't sit on the mat +EXPECTED + assert_equal expected, Jkl::Text::plain_text(input, 2) end end end \ No newline at end of file