spec/english_spec.rb in open-nlp-0.0.1 vs spec/english_spec.rb in open-nlp-0.1.0
- old
+ new
@@ -1,11 +1,10 @@
# encoding: utf-8
require_relative 'spec_helper'
describe OpenNLP do
- # Failing spec #1
context "the maximum entropy chunker is run after tokenization and POS tagging" do
it "should find the accurate chunks" do
chunker = OpenNLP::ChunkerME.new
tokenizer = OpenNLP::TokenizerME.new
@@ -13,31 +12,41 @@
sent = "The death of the poet was kept from his poems."
tokens = tokenizer.tokenize(sent)
tags = tagger.tag(tokens)
- chunks = chunker.chunk(tokens.to_java(:String), pos_tags.to_java(:String))
- # cannot convert instance of class org.jruby.java.proxies.ArrayJavaProxy to class java.lang.String
-
+ chunks = chunker.chunk(tokens, tags)
+
+ chunks.to_a.should eql %w[B-NP I-NP B-PP B-NP I-NP B-VP I-VP B-PP B-NP I-NP O]
tokens.to_a.should eql %w[The death of the poet was kept from his poems .]
- tags.should eql ['put tags here']
+ tags.to_a.should eql %w[DT NN IN DT NN VBD VBN IN PRP$ NNS .]
end
end
- # Failing spec #2
context "the maximum entropy parser is run after tokenization" do
it "parses the text accurately" do
+
sent = "The death of the poet was kept from his poems."
- tokenizer = OpenNLP::TokenizerME.new
- p_model = OpenNLP.load_model(:parser)
- parser = OpenNLP::ParserFactory.create(p_model)
- tokens = tokenizer.tokenize(sent)
- result = parser.parse(tokens.to_java(:String))
- # cannot convert instance of class org.jruby.java.proxies.ArrayJavaProxy to class java.lang.String
- # org/jruby/java/addons/KernelJavaAddons.java:70:in `to_java'
- # /ruby/gems/open-nlp/spec/english_spec.rb in `(root)'
- puts result.to_a.inspect
+ parser = OpenNLP::Parser.new
+ parse = parser.parse(sent)
+
+ parse.get_text.should eql sent
+
+ parse.get_span.get_start.should eql 0
+ parse.get_span.get_end.should eql 46
+ parse.get_span.get_type.should eql nil # ?
+ parse.get_child_count.should eql 1
+
+ child = parse.get_children[0]
+
+ child.text.should eql "The death of the poet was kept from his poems."
+ child.get_child_count.should eql 3
+ child.get_head_index.should eql 5
+
+ child.get_head.get_child_count.should eql 1
+ child.get_type.should eql "S"
+
end
end
context "the SimpleTokenizer is run" do
it "tokenizes the text accurately" do