Sha256: 3954046c098bd6376ec2bb9701138b6513f4506725ff922b4544695bbfb63457

Contents?: true

Size: 1.54 KB

Versions: 2

Compression:

Stored size: 1.54 KB

Contents

require 'rbbt'
require 'rjb'
require 'libxml'
require 'rbbt/text/segment'
require 'rbbt/ner/NER'
require 'rbbt/util/log'

class OSCAR4 < NER
  Rbbt.claim Rbbt.software.opt.OSCAR4, :install, Rbbt.share.install.software.OSCAR4.find

  def self.init

    # There is an incompatibility between the OpenNLP version in OSCAR4 and the
    # one used for other matters in Rbbt, which is the most recent. We remove
    # the standalone jars from the CLASSPATH
    ENV["CLASSPATH"] = ENV["CLASSPATH"].split(":").select{|p| p !~ /opennlp/} * ":"

    Rjb::load(nil, jvmargs = ['-Xms1G','-Xmx2G']) unless Rjb.loaded?

    @@OSCAR      ||= Rjb::import('uk.ac.cam.ch.wwmm.oscar.Oscar')
    @@FormatType ||= Rjb::import('uk.ac.cam.ch.wwmm.oscar.chemnamedict.entities.FormatType')
  end

  def self.tagger
    @@tagger ||= @@OSCAR.new()
  end

  def self.match(text,  type = nil)
    self.init

    return [] if text.nil? or text.strip.empty?

    oscar = tagger
    #entities = oscar.findAndResolveNamedEntities(text);
    entities = oscar.findNamedEntities(text);
    it = entities.iterator

    result = []

    while it.hasNext
      entity = it.next
      mention = entity.getSurface
      #inchi = entity.getFirstChemicalStructure(@@FormatType.INCHI)
      #inchi = inchi.getValue() unless inchi.nil?
      inchi = nil

      next unless entity.getType.toString == type unless type.nil?

      NamedEntity.setup mention, entity.getStart, entity.getType, inchi, entity.getConfidence

      result << mention
    end

    result
  end

  def match(*args)
    OSCAR4.match *args
  end
end



Version data entries

2 entries across 2 versions & 1 rubygems

Version Path
rbbt-text-1.2.0 lib/rbbt/ner/oscar4.rb
rbbt-text-1.1.9 lib/rbbt/ner/oscar4.rb