Sha256: aab280ab8da6cf18a6daf2ed6d7508cb0ef9d83b70d49d490a9837760a3287d8

Contents?: true

Size: 1.79 KB

Versions: 2

Compression:

Stored size: 1.79 KB

Contents

module BioInterchange::TextMining

require 'rubygems'
require 'json'

class PubannosJsonReader < BioInterchange::TextMining::TMReader

  def deserialize(inputstream)
    if inputstream.kind_of?(IO) then
      pubannos(inputstream.read)
    elsif inputstream.kind_of?(String) then
      pubannos(inputstream)
    else
      #else raise exception
      super(inputstream)
    end
  end

private 

  # Specific method for parsing of *Pubannotations* json format
  def pubannos(data)
    
    result = JSON.parse(data)
    
    if result.has_key? 'Error'
      raise BioInterchange::Exceptions::InputFormatError, 'Error parsing the JSON input file: #{result["Error"]}'
    end
    
    
    text = result['text']
    #doc_uri = "http://pubannotation.dbcls.jp/pmdocs/" + result['pmid'].to_s
    doc_uri = result['docurl']
    
    doc = Document.new(doc_uri)
    docContent = Content.new(0, text.length, Content::DOCUMENT, @process)
    docContent.setContext(doc)
    doc.add(docContent)
    
    #so our document requires content of type document or abstract
    #should it hold the content string?
    
    if result['catanns']
      result['catanns'].each do |annot| 
        start_offset = annot['begin']
        end_offset = annot['end']
        length = end_offset - start_offset
        created_time = annot['created_at']
        updated_time = annot['updated_at']
        category = annot['category']
        #annset_id = annot['annset_id']
        #doc_id = annot['doc_id']
        #id = annot['id']
        
        entity = text.slice(start_offset..end_offset)
      
        #phrase = type for NE
        con = Content.new(start_offset, length, Content::PHRASE, @process)
        con.setContext(doc)
        doc.add(con)
        
        #set process.date = updated_time?
      end
    end
    
    doc
  end
  

end

end

Version data entries

2 entries across 2 versions & 1 rubygems

Version Path
biointerchange-0.1.2 lib/biointerchange/textmining/pubannos_json_reader.rb
biointerchange-0.1.0 lib/biointerchange/textmining/pubannos_json_reader.rb