Sha256: 3954046c098bd6376ec2bb9701138b6513f4506725ff922b4544695bbfb63457
Contents?: true
Size: 1.54 KB
Versions: 2
Compression:
Stored size: 1.54 KB
Contents
require 'rbbt' require 'rjb' require 'libxml' require 'rbbt/text/segment' require 'rbbt/ner/NER' require 'rbbt/util/log' class OSCAR4 < NER Rbbt.claim Rbbt.software.opt.OSCAR4, :install, Rbbt.share.install.software.OSCAR4.find def self.init # There is an incompatibility between the OpenNLP version in OSCAR4 and the # one used for other matters in Rbbt, which is the most recent. We remove # the standalone jars from the CLASSPATH ENV["CLASSPATH"] = ENV["CLASSPATH"].split(":").select{|p| p !~ /opennlp/} * ":" Rjb::load(nil, jvmargs = ['-Xms1G','-Xmx2G']) unless Rjb.loaded? @@OSCAR ||= Rjb::import('uk.ac.cam.ch.wwmm.oscar.Oscar') @@FormatType ||= Rjb::import('uk.ac.cam.ch.wwmm.oscar.chemnamedict.entities.FormatType') end def self.tagger @@tagger ||= @@OSCAR.new() end def self.match(text, type = nil) self.init return [] if text.nil? or text.strip.empty? oscar = tagger #entities = oscar.findAndResolveNamedEntities(text); entities = oscar.findNamedEntities(text); it = entities.iterator result = [] while it.hasNext entity = it.next mention = entity.getSurface #inchi = entity.getFirstChemicalStructure(@@FormatType.INCHI) #inchi = inchi.getValue() unless inchi.nil? inchi = nil next unless entity.getType.toString == type unless type.nil? NamedEntity.setup mention, entity.getStart, entity.getType, inchi, entity.getConfidence result << mention end result end def match(*args) OSCAR4.match *args end end
Version data entries
2 entries across 2 versions & 1 rubygems
Version | Path |
---|---|
rbbt-text-1.2.0 | lib/rbbt/ner/oscar4.rb |
rbbt-text-1.1.9 | lib/rbbt/ner/oscar4.rb |