Sha256: 7ea6e264687d73a16d5ef68f287714008259a903d8348d5c91a3099a7a1c0807

Contents?: true

Size: 1.97 KB

Versions: 1

Compression:

Stored size: 1.97 KB

Contents

require "test/unit"
require "shoulda"
require "webmock/test_unit"
require "yaml"
require_relative "../../lib/jkl"

class TextCleaningTest < Test::Unit::TestCase
  context "Cleaning Text" do
    
    should "Remove short lines" do
      input = <<-HTML
the cat sat on the mat
a short line
HTML
      result = Jkl::Text::remove_short_lines input
      assert result == "the cat sat on the mat"
    end
    
    should "Remove shorter lines" do
      input = <<-HTML
the cat sat on the mat
the cat sat on the slightly fluffy, yet worn and homely mat
a short line
HTML
      result = Jkl::Text::remove_short_lines(input, 8)
      assert result == "the cat sat on the slightly fluffy, yet worn and homely mat"
    end
    
    should "Remove script tags" do
      input = <<-HTML
the cat sat on the mat
<script type="text/javascript" charset="utf-8">
function nofunction(){var bob;}
</script>
a short line
HTML
      result = Jkl::Text::remove_short_lines input
      assert result == "the cat sat on the mat"
    end
    
    should "Remove html comments" do
      input = <<-HTML
the cat sat on the mat
<!-- a comment-->
a short line
HTML
      result = Jkl::Text::remove_short_lines input
      assert result == "the cat sat on the mat"      
    end
    
    should "Remove blank lines" do
      input = <<-HTML
the cat sat on the mat

a short line
HTML
      result = Jkl::Text::remove_short_lines input
      assert result == "the cat sat on the mat"
    end
    
    should "Strip all tags" do
      input = <<-HTML
<p>the cat sat on the mat</p>
HTML
      result = Jkl::Text::strip_all_tags input
      assert result == "the cat sat on the mat\n" #TODO fix carriage return
    end
  
    should "Clean text" do
      input = <<-HTML
the cat sat on the mat
<script type="text/javascript" charset="utf-8">
 function nofunction(){var bob;}
</script>
<p> some para stuff here </p>
some end stuff here
HTML
      result = Jkl::Text::plain_text(input)
      assert result == "the cat sat on the mat"
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
jakal-0.2.0 test/unit/text_cleaning_test.rb