Sha256: 96c035f39ea3b7621727b104f3e5df41609c8165714b5ef3270c32156e91a76d

Contents?: true

Size: 1.34 KB

Versions: 11

Compression:

Stored size: 1.34 KB

Contents

require 'rjb'
require 'rbbt'
require 'rbbt/segment/named_entity'

module Linnaeus

  Rbbt.claim Rbbt.software.opt.Linnaeus, :install, Rbbt.share.install.software.Linnaeus.find

  ARGS = ["--properties", Rbbt.software.opt.Linnaeus.produce["species-proxy/properties.conf"].find]


  Rjb::load(nil, jvmargs = ['-Xms2G','-Xmx2G']) unless Rjb.loaded?
  def self.init
    begin
      @@ArgParser    = Rjb::import('martin.common.ArgParser')
      @@Args         = @@ArgParser.new(ARGS)
      @@Loggers      = Rjb::import('martin.common.Loggers')
      @@Logger       = @@Loggers.getDefaultLogger(@@Args)
      @@EntityTagger = Rjb::import('uk.ac.man.entitytagger.EntityTagger')
      @@Matcher      = @@EntityTagger.getMatcher(@@Args, @@Logger)
    rescue
      if $!.message =~ /heap space/i
        Log.warn "Heap Space seems too low. Make sure Linnaeus is loaded before other Java wrappers so that it has the chance to init the Java Bridge with sufficient heap space"
      end
      raise $!
    end
  end

  def self.match(text)

    init unless defined? @@Matcher

    @@Matcher.match(text).toArray().collect do |mention|
      best_id, best_prob = mention.ids().zip(mention.probabilities()).sort_by{|i,p| p.to_f }.last
      NamedEntity.setup(mention.text(), :offset => mention.start(), :entity_type => "Organism", :code => best_id, :score => best_prob)
    end
  end
end

Version data entries

11 entries across 11 versions & 1 rubygems

Version Path
rbbt-text-1.5.2 lib/rbbt/ner/linnaeus.rb
rbbt-text-1.5.1 lib/rbbt/ner/linnaeus.rb
rbbt-text-1.5.0 lib/rbbt/ner/linnaeus.rb
rbbt-text-1.4.0 lib/rbbt/ner/linnaeus.rb
rbbt-text-1.3.11 lib/rbbt/ner/linnaeus.rb
rbbt-text-1.3.10 lib/rbbt/ner/linnaeus.rb
rbbt-text-1.3.9 lib/rbbt/ner/linnaeus.rb
rbbt-text-1.3.8 lib/rbbt/ner/linnaeus.rb
rbbt-text-1.3.7 lib/rbbt/ner/linnaeus.rb
rbbt-text-1.3.6 lib/rbbt/ner/linnaeus.rb
rbbt-text-1.3.5 lib/rbbt/ner/linnaeus.rb