RubygemsResearch

Sha256: a32edab3ecbe9375ce00cd4665d7462ceeb9f29935938def2914fa5480db5bcd

Contents?: true

Size: 1.82 KB

Versions: 11

Compression:

Stored size: 1.82 KB

require 'rbbt'
require 'rjb'
require 'rbbt/resource'
require 'rbbt/segment'
require 'rbbt/ner/NER'

# Offers a Ruby interface to the Abner Named Entity Recognition Package
# in Java Abner[http://www.cs.wisc.edu/~bsettles/abner/].
class Abner < NER

  Rbbt.claim Rbbt.software.opt.ABNER, :install, Rbbt.share.install.software.ABNER.find

  def self.init
    Rbbt.software.opt.ABNER.produce
    @@JFile   ||= Rjb::import('java.io.File')
    @@Tagger  ||= Rjb::import('abner.Tagger')
    @@Trainer ||= Rjb::import('abner.Trainer')
  end

  # If modelfile is present a custom trained model can be used,
  # otherwise, the default BioCreative model is used.
  def initialize(modelfile=nil)
    Abner.init
    if modelfile == nil         
      @tagger = @@Tagger.new(@@Tagger.BIOCREATIVE)
    else                
      @tagger = @@Tagger.new(@@JFile.new(modelfile))
    end
  end

  # Given a chunk of text, it finds all the mentions appearing in it. It
  # returns all the mentions found, regardless of type, to be coherent
  # with the rest of NER packages in Rbbt.
  def match(text, fix_encode = true)
    return [] if text.nil? or text.empty?

    text = text.encode('utf-8', 'binary', :invalid => :replace, :undef => :replace, :replace => '') if fix_encode
    res = @tagger.getEntities(text)
    types = res[1]
    strings = res[0]

    docid = Misc.digest(text)
    global_offset = 0
    strings.zip(types).collect do |mention, type| 
      mention = mention.to_s; 
      offset = text.index(mention)
      if offset.nil?
        NamedEntity.setup(mention, :docid => docid, :entity_type => type)
      else
        NamedEntity.setup(mention, :offset => offset + global_offset, :docid => docid, :entity_type => type.to_s)
        text = text[offset + mention.length..-1]
        global_offset += offset + mention.length
      end

      mention
    end
  end

end

Version data entries

11 entries across 11 versions & 1 rubygems

Version	Path
rbbt-text-1.5.2	lib/rbbt/ner/abner.rb
rbbt-text-1.5.1	lib/rbbt/ner/abner.rb
rbbt-text-1.5.0	lib/rbbt/ner/abner.rb
rbbt-text-1.4.0	lib/rbbt/ner/abner.rb
rbbt-text-1.3.11	lib/rbbt/ner/abner.rb
rbbt-text-1.3.10	lib/rbbt/ner/abner.rb
rbbt-text-1.3.9	lib/rbbt/ner/abner.rb
rbbt-text-1.3.8	lib/rbbt/ner/abner.rb
rbbt-text-1.3.7	lib/rbbt/ner/abner.rb
rbbt-text-1.3.6	lib/rbbt/ner/abner.rb
rbbt-text-1.3.5	lib/rbbt/ner/abner.rb