require 'rubygems' require 'rspec' # Turn off verbose reporting here, since class definitions may be loaded multiple # times here. That reports that constants have been already been initialized, which # is true, but they are only "re-initialized" with the very same values. v, $VERBOSE = $VERBOSE, nil load 'lib/biointerchange/core.rb' load 'lib/biointerchange/reader.rb' load 'lib/biointerchange/model.rb' load 'lib/biointerchange/textmining/text_mining_reader.rb' load 'lib/biointerchange/textmining/pubannos_json_reader.rb' load 'lib/biointerchange/textmining/document.rb' load 'lib/biointerchange/textmining/content.rb' load 'lib/biointerchange/textmining/content_connection.rb' load 'lib/biointerchange/textmining/process.rb' $VERBOSE = v describe BioInterchange::TextMining::PubAnnosJSONReader do describe 'deserialization of pubannos json text-mining documents' do describe 'IO check' do before :all do @reader = BioInterchange::TextMining::PubAnnosJSONReader.new("Test", "http://test.com", "00-00-0000", BioInterchange::TextMining::Process::UNSPECIFIED, "0.0") end it 'reader is not postponed upon instantiation' do @reader.postponed?.should eql false end it 'read json from string' do model = @reader.deserialize('{"docurl":"http://example.org/test","text":""}') model.should be_an_instance_of BioInterchange::TextMining::Document end it 'read json from file' do model = @reader.deserialize(File.new('examples/pubannotation.10096561.json')) model.should be_an_instance_of BioInterchange::TextMining::Document end it 'read old json from file' do model = @reader.deserialize(File.new('examples/pubannotation.10096561.json.old')) model.should be_an_instance_of BioInterchange::TextMining::Document end end describe 'old json generated model checks' do before :all do reader = BioInterchange::TextMining::PubAnnosJSONReader.new("TestOld", "http://test.com", "00-00-0000", BioInterchange::TextMining::Process::UNSPECIFIED, "0.0") @model = reader.deserialize('{ "name": "Peter Smith", "name_id": "", "date": "2012-08-12", "version": "3", "docurl":"http://example.org/example_json", "text":"Some document text. With two annotations of type protein.\n", "catanns":[{"annset_id":1,"begin":0,"category":"Protein","doc_id":9,"end":10,"id":139},{"annset_id":1,"begin":20,"category":"Protein","doc_id":9,"end":42,"id":138}]}') end it 'model is of type document' do @model.should be_an_instance_of BioInterchange::TextMining::Document end it 'document uri (job id read)' do @model.uri.should eql "http://example.org/example_json" end it 'document has content' do @model.contents.size.should eql 3 end it 'document document' do @model.contents[0].type.should eql BioInterchange::TextMining::Content::DOCUMENT and @model.contents[0].offset.should eql 0 and @model.contents[0].length.should eql 58 end it 'document phrase' do @model.contents[1].type.should eql BioInterchange::TextMining::Content::PHRASE and @model.contents[1].offset.should eql 0 and @model.contents[1].length.should eql 10 and @model.contents[2].type.should eql BioInterchange::TextMining::Content::PHRASE and @model.contents[2].offset.should eql 20 and @model.contents[2].length.should eql 22 end end describe 'basic generated model checks' do before :all do reader = BioInterchange::TextMining::PubAnnosJSONReader.new("Test", "http://test.com", "00-00-0000", BioInterchange::TextMining::Process::UNSPECIFIED, "0.0") @model = reader.deserialize('{ "name": "Peter Smith", "name_id": "", "date": "2012-12-08", "version": "3", "docurl":"http://example.org/example_json", "text":"Some document text. With two annotations of type protein.\n", "catanns":[{"id":"T1","span":{"begin":0,"end":10},"category":"NP"},{"id":"T2","span":{"begin":20,"end":42},"category":"NP"}]}') end it 'model is of type document' do @model.should be_an_instance_of BioInterchange::TextMining::Document end it 'document uri (job id read)' do @model.uri.should eql "http://example.org/example_json" end it 'document has content' do @model.contents.size.should eql 3 end it 'document document' do @model.contents[0].type.should eql BioInterchange::TextMining::Content::DOCUMENT and @model.contents[0].offset.should eql 0 and @model.contents[0].length.should eql 58 end it 'document phrase' do @model.contents[1].type.should eql BioInterchange::TextMining::Content::PHRASE and @model.contents[1].offset.should eql 0 and @model.contents[1].length.should eql 10 and @model.contents[2].type.should eql BioInterchange::TextMining::Content::PHRASE and @model.contents[2].offset.should eql 20 and @model.contents[2].length.should eql 22 end end describe 'advanced generated model checks' do before :all do reader = BioInterchange::TextMining::PubAnnosJSONReader.new("Test", "http://test.com", "00-00-0000", BioInterchange::TextMining::Process::UNSPECIFIED, "0.0") @model = reader.deserialize(File.new('examples/pubannotation.2626671.json')) end it 'model is of type document' do @model.should be_an_instance_of BioInterchange::TextMining::Document end it 'document uri (job id read)' do @model.uri.should eql "http://www.ncbi.nlm.nih.gov/pubmed/2626671" end it 'document has content' do @model.contents.size.should eql 91 end it 'document document' do #range as exact length seems to depend on encoding used... @model.contents[0].type.should eql BioInterchange::TextMining::Content::DOCUMENT and @model.contents[0].offset.should eql 0 and ( @model.contents[0].length.should > 2350 or @model.contents[0].length.should < 2360 ) end it 'document content types and interconnections' do doc = 1 sub = 39 eq = 62 th = 64 spec = 87 neg = 89 @model.contents[doc].type.should eql BioInterchange::TextMining::Content::PHRASE and @model.contents[doc].offset.should eql 9 and @model.contents[doc].length.should eql 10 and @model.contents[sub].type.should eql BioInterchange::TextMining::ContentConnection::SUBCLASS and @model.contents[sub].content1.should eql nil and @model.contents[sub].content2.offset.should eql 9 and @model.contents[eq].type.should eql BioInterchange::TextMining::ContentConnection::EQUIVALENCE and @model.contents[eq].content1.offset.should eql 396 and @model.contents[eq].content2.offset.should eql 386 and @model.contents[th].type.should eql BioInterchange::TextMining::ContentConnection::THEME and @model.contents[th].content1.offset.should eql 32 and @model.contents[th].content2.content2.offset.should eql 9 and @model.contents[spec].type.should eql BioInterchange::TextMining::ContentConnection::SPECULATION and @model.contents[spec].content1.content2.offset.should eql 9 and @model.contents[spec].content2.should eql nil and @model.contents[neg].type.should eql BioInterchange::TextMining::ContentConnection::NEGATION and @model.contents[neg].content1.content2.offset.should eql 426 and @model.contents[neg].content2.should eql nil end end end end