lib/nabatheon.rb in nabatheon-0.0.2 vs lib/nabatheon.rb in nabatheon-0.0.3

- old
+ new

@@ -1,26 +1,34 @@ require 'nabatheon/version' +require 'nabatheon/entity' +require 'nabatheon/rule' +require 'nabatheon/search' + require 'stanford-core-nlp' module Nabatheon BASE_DEP_PATH = "#{File.expand_path('~')}/.stanford-nlp" StanfordCoreNLP.jar_path = "#{BASE_DEP_PATH}/nlp/jars/stanford-core-nlp-full/" StanfordCoreNLP.model_path = "#{BASE_DEP_PATH}/nlp/models/" StanfordCoreNLP.log_file = 'tmp/log.txt' StanfordCoreNLP.use :english Pipeline = StanfordCoreNLP.load(:tokenize, :ssplit, :pos, :lemma, :parse, :ner) - def self.annotate(raw_text) - text = StanfordCoreNLP::Annotation.new(raw_text) - Pipeline.annotate(text) + def self.annotate(text) + annotated_text = StanfordCoreNLP::Annotation.new(text) + Pipeline.annotate(annotated_text) - named_lemma = [] - text.get(:sentences).each do |sentence| + tagged = [] + annotated_text.get(:sentences).each do |sentence| sentence.get(:tokens).each do |token| entity_tag = token.get(:named_entity_tag).to_s lemma = token.get(:lemma).to_s - named_lemma << [entity_tag, lemma] unless entity_tag == 'O' + tagged << { named_entity: entity_tag, type: lemma } end end - named_lemma + tagged + end + + def self.relevant_searches_for(annotations) + Rule.apply_on(annotations) end end