spec/lib/internals/tokenizers/base_spec.rb in picky-1.5.2 vs spec/lib/internals/tokenizers/base_spec.rb in picky-1.5.3
- old
+ new
@@ -10,15 +10,42 @@
tokenizer.reject([:'', :a, :ab, :abc]).should == [:ab, :abc]
end
it 'rejects tokens that are called :hello' do
tokenizer.reject([:hel, :hell, :hello]).should == [:hel, :hell]
end
+ describe 'to_s' do
+ it 'does something' do
+ tokenizer.to_s.should == <<-EXPECTED
+Removes characters: -
+Stopwords: -
+Splits text on: /\\s/
+Removes chars after split: -
+Normalizes words: -
+Rejects tokens? Yes, see line 8 in app/application.rb
+Substitutes chars? -
+EXPECTED
+ end
+ end
end
context 'with normal instance' do
let(:tokenizer) { described_class.new }
-
+
+ describe 'to_s' do
+ it 'does something' do
+ tokenizer.to_s.should == <<-EXPECTED
+Removes characters: -
+Stopwords: -
+Splits text on: /\\s/
+Removes chars after split: -
+Normalizes words: -
+Rejects tokens? -
+Substitutes chars? -
+EXPECTED
+ end
+ end
+
describe 'reject_token_if' do
it 'rejects empty tokens by default' do
tokenizer.reject(['a', nil, '', 'b']).should == ['a', 'b']
end
it 'rejects tokens based on the given rejection criteria if set' do
@@ -30,10 +57,13 @@
describe "substitute(s)_characters*" do
it "doesn't substitute if there is no substituter" do
tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzäöü'
end
+ it 'raises if nothing with #substitute is given' do
+ expect { tokenizer.substitutes_characters_with Object.new }.to raise_error("The substitutes_characters_with option needs a character substituter, which responds to #substitute.")
+ end
it "uses the substituter to replace characters" do
tokenizer.substitutes_characters_with CharacterSubstituters::WestEuropean.new
tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzaeoeue'
end
@@ -43,10 +73,13 @@
tokenizer.substitute_characters('abcdefghijklmnopqrstuvwxyzäöü').should == 'abcdefghijklmnopqrstuvwxyzaeoeue'
end
end
describe "removes_characters_after_splitting" do
+ it 'handles broken arguments' do
+ expect { tokenizer.removes_characters_after_splitting("gnorf") }.to raise_error(ArgumentError)
+ end
context "without removes_characters_after_splitting called" do
it "has remove_after_normalizing_illegals" do
expect { tokenizer.remove_after_normalizing_illegals('any') }.to_not raise_error
end
it 'should define a remove_after_normalizing_illegals normalize_with_patterns does nothing' do
@@ -67,10 +100,13 @@
end
end
end
describe "normalizes_words" do
+ it 'handles broken arguments' do
+ expect { tokenizer.normalizes_words(:not_an_array) }.to raise_error(ArgumentError)
+ end
context "without normalizes_words called" do
it "has normalize_with_patterns" do
expect { tokenizer.normalize_with_patterns('any') }.to_not raise_error
end
it 'should define a method normalize_with_patterns does nothing' do
@@ -94,10 +130,16 @@
end
end
end
describe "splits_text_on" do
+ it 'handles nonbroken arguments' do
+ expect { tokenizer.splits_text_on("hello") }.to_not raise_error(ArgumentError)
+ end
+ it 'handles broken arguments' do
+ expect { tokenizer.splits_text_on(:gnorf) }.to raise_error(ArgumentError)
+ end
context "without splits_text_on called" do
it "has split" do
lambda { tokenizer.split('any') }.should_not raise_error
end
it 'should define a method split that splits by default on \s' do
@@ -119,10 +161,13 @@
end
end
end
describe "removes_characters" do
+ it 'handles broken arguments' do
+ expect { tokenizer.removes_characters("hello") }.to raise_error(ArgumentError)
+ end
context "without removes_characters called" do
it "has remove_illegals" do
expect { tokenizer.remove_illegals('any') }.to_not raise_error
end
it 'should define a method remove_illegals that does nothing' do
@@ -143,9 +188,12 @@
end
end
end
describe 'stopwords' do
+ it 'handles broken arguments' do
+ expect { tokenizer.stopwords("hello") }.to raise_error(ArgumentError)
+ end
context 'without stopwords given' do
it 'should define a method remove_stopwords' do
lambda { tokenizer.remove_stopwords('from this text') }.should_not raise_error
end
it 'should define a method remove_stopwords that does nothing' do
\ No newline at end of file