Sha256: 38bafc9068a5a83615dcb87decb722ba567f11b727784c12cb4794676bbf8cfa

Contents?: true

Size: 882 Bytes

Versions: 1

Compression:

Stored size: 882 Bytes

Contents

require 'nabatheon/version'
require 'stanford-core-nlp'

module Nabatheon
  BASE_DEP_PATH              = "#{File.expand_path('~')}/.stanford-nlp"
  StanfordCoreNLP.jar_path   = "#{BASE_DEP_PATH}/nlp/jars/stanford-core-nlp-full/"
  StanfordCoreNLP.model_path = "#{BASE_DEP_PATH}/nlp/models/"
  StanfordCoreNLP.log_file   = 'tmp/log.txt'
  StanfordCoreNLP.use :english
  Pipeline = StanfordCoreNLP.load(:tokenize, :ssplit, :pos, :lemma, :parse, :ner)

  def self.annotate(raw_text)
    text = StanfordCoreNLP::Annotation.new(raw_text)
    Pipeline.annotate(text)

    named_lemma = []
    text.get(:sentences).each do |sentence|
      sentence.get(:tokens).each do |token|
        entity_tag = token.get(:named_entity_tag).to_s
        lemma      = token.get(:lemma).to_s
        named_lemma << [entity_tag, lemma] unless entity_tag == 'O'
      end
    end
    named_lemma
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
nabatheon-0.0.2 lib/nabatheon.rb