Sha256: aab280ab8da6cf18a6daf2ed6d7508cb0ef9d83b70d49d490a9837760a3287d8
Contents?: true
Size: 1.79 KB
Versions: 2
Compression:
Stored size: 1.79 KB
Contents
module BioInterchange::TextMining require 'rubygems' require 'json' class PubannosJsonReader < BioInterchange::TextMining::TMReader def deserialize(inputstream) if inputstream.kind_of?(IO) then pubannos(inputstream.read) elsif inputstream.kind_of?(String) then pubannos(inputstream) else #else raise exception super(inputstream) end end private # Specific method for parsing of *Pubannotations* json format def pubannos(data) result = JSON.parse(data) if result.has_key? 'Error' raise BioInterchange::Exceptions::InputFormatError, 'Error parsing the JSON input file: #{result["Error"]}' end text = result['text'] #doc_uri = "http://pubannotation.dbcls.jp/pmdocs/" + result['pmid'].to_s doc_uri = result['docurl'] doc = Document.new(doc_uri) docContent = Content.new(0, text.length, Content::DOCUMENT, @process) docContent.setContext(doc) doc.add(docContent) #so our document requires content of type document or abstract #should it hold the content string? if result['catanns'] result['catanns'].each do |annot| start_offset = annot['begin'] end_offset = annot['end'] length = end_offset - start_offset created_time = annot['created_at'] updated_time = annot['updated_at'] category = annot['category'] #annset_id = annot['annset_id'] #doc_id = annot['doc_id'] #id = annot['id'] entity = text.slice(start_offset..end_offset) #phrase = type for NE con = Content.new(start_offset, length, Content::PHRASE, @process) con.setContext(doc) doc.add(con) #set process.date = updated_time? end end doc end end end
Version data entries
2 entries across 2 versions & 1 rubygems
Version | Path |
---|---|
biointerchange-0.1.2 | lib/biointerchange/textmining/pubannos_json_reader.rb |
biointerchange-0.1.0 | lib/biointerchange/textmining/pubannos_json_reader.rb |