Sha256: fb4b36bad0b45488b03e77eb687e4a76a428211fa459520db0b1973e17cfb76b

Contents?: true

Size: 1.64 KB

Versions: 5

Compression:

Stored size: 1.64 KB

Contents

require "test/unit"
require "shoulda"
require "webmock/test_unit"
require "yaml"
require "lib/jkl"

class TextCleaningTest < Test::Unit::TestCase
  context "Cleaning Text" do
    
    should "Remove short lines" do
      input = <<-HTML
the cat sat on the mat
a short line
HTML
      result = Jkl::Text::remove_short_lines input
      assert result == "the cat sat on the mat"
    end
    
    should "Remove script tags" do
      input = <<-HTML
the cat sat on the mat
<script type="text/javascript" charset="utf-8">
function nofunction(){var bob;}
</script>
a short line
HTML
      result = Jkl::Text::remove_short_lines input
      assert result == "the cat sat on the mat"
    end
    
    should "Remove html comments" do
      input = <<-HTML
the cat sat on the mat
<!-- a comment-->
a short line
HTML
      result = Jkl::Text::remove_short_lines input
      assert result == "the cat sat on the mat"      
    end
    
    should "Remove blank lines" do
      input = <<-HTML
the cat sat on the mat

a short line
HTML
      result = Jkl::Text::remove_short_lines input
      assert result == "the cat sat on the mat"
    end
    
    should "Strip all tags" do
      input = <<-HTML
<p>the cat sat on the mat</p>
HTML
      result = Jkl::Text::strip_all_tags input
      assert result == "the cat sat on the mat\n" #TODO fix carriage return
    end
  
    should "Clean text" do
      input = <<-HTML
the cat sat on the mat
<script type="text/javascript" charset="utf-8">
 function nofunction(){var bob;}
</script>
<p> some para stuff here </p>
some end stuff here
HTML
      result = Jkl::Text::clean input
      assert result == "the cat sat on the mat"
    end
  end
end

Version data entries

5 entries across 5 versions & 1 rubygems

Version Path
jakal-0.1.92 test/unit/text_cleaning_test.rb
jakal-0.1.91 test/unit/text_cleaning_test.rb
jakal-0.1.9 test/unit/text_cleaning_test.rb
jakal-0.1.8 test/unit/text_cleaning_test.rb
jakal-0.1.7 test/unit/text_cleaning_test.rb