require 'rubygems'
require 'rspec'
# Turn off verbose reporting here, since class definitions may be loaded multiple
# times here. That reports that constants have been already been initialized, which
# is true, but they are only "re-initialized" with the very same values.
v, $VERBOSE = $VERBOSE, nil
load 'lib/biointerchange/core.rb'
load 'lib/biointerchange/reader.rb'
load 'lib/biointerchange/textmining/text_mining_reader.rb'
load 'lib/biointerchange/textmining/pdfx_xml_reader.rb'
load 'lib/biointerchange/textmining/document.rb'
load 'lib/biointerchange/textmining/content.rb'
load 'lib/biointerchange/textmining/process.rb'
$VERBOSE = v
describe BioInterchange::TextMining::PdfxXmlReader do
describe 'deserialization of pdfx text-mining documents' do
describe 'IO check' do
before :all do
@reader = BioInterchange::TextMining::PdfxXmlReader.new("Test", "http://test.com", "00-00-0000", BioInterchange::TextMining::Process::UNSPECIFIED, "0.0")
end
it 'reader is not postponed upon instantiation' do
@reader.postponed?.should eql false
end
it 'read pdfx from string' do
model = @reader.deserialize("text")
model.should be_an_instance_of BioInterchange::TextMining::Document
end
it 'read pdfx from file' do
model = @reader.deserialize(File.new('examples/gb-2007-8-3-R40.xml'))
model.should be_an_instance_of BioInterchange::TextMining::Document
end
end
describe 'generated model check' do
before :all do
reader = BioInterchange::TextMining::PdfxXmlReader.new("Test", "http://test.com", "00-00-0000", BioInterchange::TextMining::Process::UNSPECIFIED, "0.0")
@model = reader.deserialize("rspec_testTITLEABSTRACTBODY TEXTSECTION LEVEL 1END SECTION LEVEL 1")
#puts "Document Model: #{@model.uri}"
# @model.contents.each do |c|
# puts "\tContent: #{c.type}, #{c.offset}, #{c.length}"
#end
end
it 'model is of type document' do
@model.should be_an_instance_of BioInterchange::TextMining::Document
end
it 'document uri (job id read)' do
@model.uri.should eql "http://pdfx.cs.man.ac.uk/rspec_test"
end
it 'document has content' do
@model.contents.size.should eql 7
end
it 'document document' do
@model.contents[6].type.should eql BioInterchange::TextMining::Content::DOCUMENT and @model.contents[6].offset.should eql 0 and @model.contents[6].length.should eql 90
end
it 'document title' do
@model.contents[0].type.should eql BioInterchange::TextMining::Content::TITLE and @model.contents[0].offset.should eql 0 and @model.contents[0].length.should eql 5
end
it 'document abstract' do
@model.contents[1].type.should eql BioInterchange::TextMining::Content::ABSTRACT and @model.contents[1].offset.should eql 5 and @model.contents[1].length.should eql 8
end
it 'document body' do
@model.contents[5].type.should eql BioInterchange::TextMining::Content::SECTION and @model.contents[5].offset.should eql 13 and @model.contents[5].length.should eql 77
end
it 'document sections' do
@model.contents[2].type.should eql BioInterchange::TextMining::Content::SECTION and
@model.contents[2].offset.should eql 37 and
@model.contents[2].length.should eql 17 and
@model.contents[3].type.should eql BioInterchange::TextMining::Content::SECTION and
@model.contents[3].offset.should eql 54 and
@model.contents[3].length.should eql 17 and
@model.contents[4].type.should eql BioInterchange::TextMining::Content::SECTION and @model.contents[4].offset.should eql 22 and @model.contents[4].length.should eql 68
end
end
end
end