lib/nabatheon.rb in nabatheon-0.0.2 vs lib/nabatheon.rb in nabatheon-0.0.3
- old
+ new
@@ -1,26 +1,34 @@
require 'nabatheon/version'
+require 'nabatheon/entity'
+require 'nabatheon/rule'
+require 'nabatheon/search'
+
require 'stanford-core-nlp'
module Nabatheon
BASE_DEP_PATH = "#{File.expand_path('~')}/.stanford-nlp"
StanfordCoreNLP.jar_path = "#{BASE_DEP_PATH}/nlp/jars/stanford-core-nlp-full/"
StanfordCoreNLP.model_path = "#{BASE_DEP_PATH}/nlp/models/"
StanfordCoreNLP.log_file = 'tmp/log.txt'
StanfordCoreNLP.use :english
Pipeline = StanfordCoreNLP.load(:tokenize, :ssplit, :pos, :lemma, :parse, :ner)
- def self.annotate(raw_text)
- text = StanfordCoreNLP::Annotation.new(raw_text)
- Pipeline.annotate(text)
+ def self.annotate(text)
+ annotated_text = StanfordCoreNLP::Annotation.new(text)
+ Pipeline.annotate(annotated_text)
- named_lemma = []
- text.get(:sentences).each do |sentence|
+ tagged = []
+ annotated_text.get(:sentences).each do |sentence|
sentence.get(:tokens).each do |token|
entity_tag = token.get(:named_entity_tag).to_s
lemma = token.get(:lemma).to_s
- named_lemma << [entity_tag, lemma] unless entity_tag == 'O'
+ tagged << { named_entity: entity_tag, type: lemma }
end
end
- named_lemma
+ tagged
+ end
+
+ def self.relevant_searches_for(annotations)
+ Rule.apply_on(annotations)
end
end