Sha256: 67ac8018015231a4e210853fcce3d101381d26e761ac188bb0b34e0af1febdf7
Contents?: true
Size: 1.5 KB
Versions: 1
Compression:
Stored size: 1.5 KB
Contents
require 'rbbt' require 'rjb' require 'rbbt/ner/annotations' require 'rbbt/ner/NER' # Offers a Ruby interface to the Abner Named Entity Recognition Package # in Java Abner[http://www.cs.wisc.edu/~bsettles/abner/]. class Abner < NER Rbbt.software.opt.ABNER.define_as_install Rbbt.share.install.software.ABNER.find @@JFile = Rjb::import('java.io.File') @@Tagger = Rjb::import('abner.Tagger') @@Trainer = Rjb::import('abner.Trainer') # If modelfile is present a custom trained model can be used, # otherwise, the default BioCreative model is used. def initialize(modelfile=nil) if modelfile == nil @tagger = @@Tagger.new(@@Tagger.BIOCREATIVE) else @tagger = @@Tagger.new(@@JFile.new(modelfile)) end end # Given a chunk of text, it finds all the mentions appearing in it. It # returns all the mentions found, regardless of type, to be coherent # with the rest of NER packages in Rbbt. def match(text) return [] if text.nil? or text.empty? res = @tagger.getEntities(text) types = res[1] strings = res[0] global_offset = 0 strings.zip(types).collect do |mention, type| mention = mention.to_s; offset = text.index(mention) if offset.nil? NamedEntity.annotate(mention, nil, type.to_s) else NamedEntity.annotate(mention, offset + global_offset, type.to_s) text = text[offset + mention.length..-1] global_offset += offset + mention.length end mention end end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
rbbt-text-0.5.0 | lib/rbbt/ner/abner.rb |