Sha256: 430042be6dc76ac0ac6a94c153650ec82f821d574aae52e7610b3063d7a7efba
Contents?: true
Size: 1020 Bytes
Versions: 1
Compression:
Stored size: 1020 Bytes
Contents
require "spec_helper" describe OpenNlp::Tokenizer do subject { OpenNlp::Tokenizer } let(:model) { OpenNlp::Model::Tokenizer.new(File.join(FIXTURES_DIR, "en-token.bin")) } describe "initialization" do it "should initialize a new tokenizer" do tokenizer = subject.new(model) tokenizer.should be_a(subject) end it "should raise an argument error when no model is supplied" do lambda { subject.new(nil) }.should raise_error(ArgumentError) end end describe "tokenize a string" do let(:tokenizer) { subject.new(model) } it "should tokenize an empty string" do tokens = tokenizer.tokenize("") tokens.should == [] end it "should tokenize a sentence" do tokens = tokenizer.tokenize("The red fox sleeps soundly.") tokens.should == ["The", "red", "fox", "sleeps", "soundly", "."] end it "should raise an error when not passed a string" do lambda { tokenizer.tokenize(nil) }.should raise_error(ArgumentError) end end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
open_nlp-0.0.7-java | spec/tokenizer_spec.rb |