Sha256: 80e612d7bbcf182d844ab1eb7580bcf18322e60b46f6ceed1a3421f38ddeecb7

Contents?: true

Size: 1.8 KB

Versions: 13

Compression:

Stored size: 1.8 KB

Contents

#!/usr/bin/env ruby

require 'rbbt-util'
require 'rbbt/annotations/corpus'
require 'rbbt/annotations/corpus/pubmed'
require 'rbbt/annotations/relationships/ppi'
require 'rbbt/sources/pubmed'
require 'rbbt/ner/annotations'
require 'rbbt/ner/token_trieNER'
require 'rbbt/ner/annotations/transformed'
require 'rbbt/ner/chemical_tagger'

Corpus.define_entity_ner "Compounds", false do |doc|
  @@chemical_tagger ||= ChemicalTagger.new
  @@chemical_tagger.entities(doc.text)
end

Corpus.define_entity_ner "Diseases", false do |doc|
  if ! defined? @@tokenizer
    @@tokenizer = TokenTrieNER.new [], :longest_match => true
    @@tokenizer.merge TSV.new(Rbbt.share.databases.COSTART.COSTART, :native => 0, :extra => 0, :flatten => true), :COSTART 
    @@tokenizer.merge TSV.new(Rbbt.share.databases.CTCAE.CTCAE, :native => 0, :extra => 1, :flatten => true), :CTCAE
    @@tokenizer.merge Rbbt.share.databases.Polysearch.disease, :disease
  end
  @@tokenizer.entities(doc.text)
end

corpus = Corpus.new Rbbt.tmp.corpus["PPIS2"].find

docids = corpus.add_pubmed_query("Cancer", 5000, :abstract)

Misc.profile do
  docids[0..100].each do |docid|
    puts "ARTICLE: #{ docid }"
    doc = corpus.docid(docid)
    diseases = doc.produce_diseases
    #puts "Diseases: #{diseases.collect{|g| [g,g.id,g.offset] * ":"} * ", "}"
  #sentences = doc.sentences
  #diseases_index = Segment.index(diseases)
  #sentences.each do |sentence|
  #  diseases_in_sentence = diseases_index[sentence.range]
  #  next if diseases_in_sentence.empty?
  #  Transformed.transform(sentence, sentence.make_relative(diseases_in_sentence.dup)) do |entity|
  #    entity.html
  #  end
  #  puts "---#{[sentence.id, sentence.offset] * ":"}"
  #  puts sentence
  #  puts "Diseases: #{diseases_in_sentence.collect{|g| [g,g.id,g.offset] * ":"} * ", "}"
  #  sentence.restore
  #end
  end
end

Version data entries

13 entries across 13 versions & 1 rubygems

Version Path
rbbt-text-1.1.7 bin/get_ppis.rb
rbbt-text-1.1.6 bin/get_ppis.rb
rbbt-text-1.1.5 bin/get_ppis.rb
rbbt-text-1.1.4 bin/get_ppis.rb
rbbt-text-1.1.3 bin/get_ppis.rb
rbbt-text-1.1.2 bin/get_ppis.rb
rbbt-text-1.1.1 bin/get_ppis.rb
rbbt-text-1.1.0 bin/get_ppis.rb
rbbt-text-1.0.0 bin/get_ppis.rb
rbbt-text-0.6.3 bin/get_ppis.rb
rbbt-text-0.6.2 bin/get_ppis.rb
rbbt-text-0.6.0 bin/get_ppis.rb
rbbt-text-0.5.0 bin/get_ppis.rb