spec/english_spec.rb in open-nlp-0.0.1 vs spec/english_spec.rb in open-nlp-0.1.0

- old
+ new

@@ -1,11 +1,10 @@ # encoding: utf-8 require_relative 'spec_helper' describe OpenNLP do - # Failing spec #1 context "the maximum entropy chunker is run after tokenization and POS tagging" do it "should find the accurate chunks" do chunker = OpenNLP::ChunkerME.new tokenizer = OpenNLP::TokenizerME.new @@ -13,31 +12,41 @@ sent = "The death of the poet was kept from his poems." tokens = tokenizer.tokenize(sent) tags = tagger.tag(tokens) - chunks = chunker.chunk(tokens.to_java(:String), pos_tags.to_java(:String)) - # cannot convert instance of class org.jruby.java.proxies.ArrayJavaProxy to class java.lang.String - + chunks = chunker.chunk(tokens, tags) + + chunks.to_a.should eql %w[B-NP I-NP B-PP B-NP I-NP B-VP I-VP B-PP B-NP I-NP O] tokens.to_a.should eql %w[The death of the poet was kept from his poems .] - tags.should eql ['put tags here'] + tags.to_a.should eql %w[DT NN IN DT NN VBD VBN IN PRP$ NNS .] end end - # Failing spec #2 context "the maximum entropy parser is run after tokenization" do it "parses the text accurately" do + sent = "The death of the poet was kept from his poems." - tokenizer = OpenNLP::TokenizerME.new - p_model = OpenNLP.load_model(:parser) - parser = OpenNLP::ParserFactory.create(p_model) - tokens = tokenizer.tokenize(sent) - result = parser.parse(tokens.to_java(:String)) - # cannot convert instance of class org.jruby.java.proxies.ArrayJavaProxy to class java.lang.String - # org/jruby/java/addons/KernelJavaAddons.java:70:in `to_java' - # /ruby/gems/open-nlp/spec/english_spec.rb in `(root)' - puts result.to_a.inspect + parser = OpenNLP::Parser.new + parse = parser.parse(sent) + + parse.get_text.should eql sent + + parse.get_span.get_start.should eql 0 + parse.get_span.get_end.should eql 46 + parse.get_span.get_type.should eql nil # ? + parse.get_child_count.should eql 1 + + child = parse.get_children[0] + + child.text.should eql "The death of the poet was kept from his poems." + child.get_child_count.should eql 3 + child.get_head_index.should eql 5 + + child.get_head.get_child_count.should eql 1 + child.get_type.should eql "S" + end end context "the SimpleTokenizer is run" do it "tokenizes the text accurately" do