require 'spec_helper'
module GutenbergRdf
describe Rdf do
describe "basic metadata" do
let(:xml) do
'
2006-09-28
en
Project Gutenberg
Public domain in the USA.
'
end
let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
it "expects an id" do
expect(rdf.id).to eql "98765"
end
it "expects a published date" do
expect(rdf.published).to eql "2006-09-28"
end
it "expects a publisher" do
expect(rdf.publisher).to eql "Project Gutenberg"
end
it "expects a language" do
expect(rdf.language).to eql "en"
end
it "expects the rights" do
expect(rdf.rights).to eql "Public domain in the USA."
end
end
describe "#type" do
let(:xml) do
'
Text
'
end
let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
it "expect the type of entity" do
expect(rdf.type).to eql 'Text'
end
end
describe "Titles" do
let(:xml) do
'
A Great Title
'
end
let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
it "expects a title" do
expect(rdf.title).to eql 'A Great Title'
end
it "expects subtitle to be empty" do
expect(rdf.subtitle).to eql ''
end
context "with a title and subtitle, on separate lines" do
let(:xml) do
'
A Great Multi-Title
Or, a Subtitle
'
end
let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
it "expects the title to be the first line" do
expect(rdf.title).to eql 'A Great Multi-Title'
end
it "expects the subtitle to be the second line" do
expect(rdf.subtitle).to eql 'Or, a Subtitle'
end
end
context "with; title, or, subtitle (we need to split on the 'or')" do
let(:xml) do
'
A Great Multi-Title, or, a Subtitle
'
end
let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
it "expects the title to be the first line" do
expect(rdf.title).to eql 'A Great Multi-Title'
end
it "expects the subtitle to be the second line" do
expect(rdf.subtitle).to eql 'a Subtitle'
end
end
context "when title:subtitle are separated by a colon" do
let(:xml) do
'
A Great Multi-Title: And a Subtitle
'
end
let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
it "expects a title" do
expect(rdf.title).to eql 'A Great Multi-Title'
end
it "expects a subtitle" do
expect(rdf.subtitle).to eql 'And a Subtitle'
end
end
context "when title; and subtitle are separated by a semi-colon" do
let(:xml) do
'
A Great Multi-Title; Or, a Subtitle
'
end
let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
it "expects a title" do
expect(rdf.title).to eql 'A Great Multi-Title'
end
it "expects a subtitle" do
expect(rdf.subtitle).to eql 'Or, a Subtitle'
end
context "...except when subtitles already exists" do
let(:xml) do
'
A Great Multi-Title; and some other text
Then a Subtitle on a newline
'
end
let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
it "expects a title" do
expect(rdf.title).to eql 'A Great Multi-Title; and some other text'
end
it "expects a subtitle" do
expect(rdf.subtitle).to eql 'Then a Subtitle on a newline'
end
end
end
end
describe "#authors" do
let(:xml) do
'
Verschillende
Various
1830
1905
Dodge, Mary Mapes
Dodge, Mary
'
end
let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
it "returns the correct number of authors" do
expect(rdf.authors.count).to be 2
end
it "expects an Agent object" do
expect(rdf.authors[0]).to be_an_instance_of Rdf::Agent
end
it "expects the author to have an aut role" do
expect(rdf.authors[0].role).to eq 'aut'
end
it "has the correct author names" do
expect(rdf.authors[1].fullname).to eq 'Mary Mapes Dodge'
end
it "expects other agents to have the correct role" do
expect(rdf.authors[1].role).to eq 'ctb'
end
end
describe "#subjects" do
let(:xml) do
%q{
PZ
Children's literature -- Periodicals
Children's periodicals, American
}
end
let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
it "expects correct number to be returned" do
expect(rdf.subjects.count).to be 2
end
it "expects the correct data" do
expect(rdf.subjects.first).to eql "Children's literature -- Periodicals"
expect(rdf.subjects.last).to eql "Children's periodicals, American"
end
end
describe "#covers" do
describe "official PG covers" do
let(:xml) do
'
2699
image/jpeg
2013-03-25T20:57:55.668737
http://www.gutenberg.org/files/98765/98765-h/images/cover.jpg
file:///public/vhost/g/gutenberg/html/files/98765/98765-h/images/cover.jpg
file:///public/vhost/g/gutenberg/html/files/98765/98765-rst/images/cover.jpg
2013-03-25T20:57:55.889736
10856
image/jpeg
'
end
let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
it "expects the correct number of entries returned" do
expect(rdf.covers.count).to be 4
end
it "expect medium cover url to be first in the list" do
expect(rdf.covers[0]).to eql 'http://www.gutenberg.org/cache/epub/98765/pg98765.cover.medium.jpg'
end
it "expect the small cover url after the medium" do
expect(rdf.covers[1]).to eql 'http://www.gutenberg.org/cache/epub/98765/pg98765.cover.small.jpg'
end
it "expects any other images to be listed after the official ones" do
expect(rdf.covers[2]).to eql 'http://www.gutenberg.org/files/98765/98765-h/images/cover.jpg'
end
end
describe "HTML ebook cover image" do
let(:xml) do
'
file:///public/vhost/g/gutenberg/html/files/98765/98765-rst/images/cover.jpg
file:///public/vhost/g/gutenberg/html/files/98765/98765-h/images/cover.jpg
http://www.gutenberg.org/files/98765/98765-h/images/cover.jpg
'
end
let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
it "expects only unique entries" do
expect(rdf.covers.count).to be 2
end
it "should convert File URIs to the Gutenberg URL" do
expect(rdf.covers.first).to match 'http://www.gutenberg.org'
end
it "expects the HTML cover to be listed first" do
expect(rdf.covers[0]).to eql 'http://www.gutenberg.org/files/98765/98765-h/images/cover.jpg'
end
it "expects the RST cover to be listed after the HTML" do
expect(rdf.covers[1]).to eql 'http://www.gutenberg.org/files/98765/98765-rst/images/cover.jpg'
end
end
end
describe "#ebook" do
let(:xml) do
'
293684
text/plain
2010-02-16T08:29:52.373092
application/zip
2006-09-28T12:37:26
text/plain; charset=us-ascii
116685
'
end
let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
it "expects the correct number of entries" do
expect(rdf.ebooks.count).to be 2
end
it "expects an entry to be a Media class" do
expect(rdf.ebooks.first).to be_an_instance_of Rdf::Media
end
context "only collect ebook media files" do
let(:xml) do
'
293684
text/plain
2010-02-16T08:29:52.373092
2699
image/jpeg
2013-03-25T20:57:55.668737
'
end
let(:rdf) { Rdf.new(REXML::Document.new(xml)) }
it "only extracts one media file" do
expect(rdf.ebooks.count).to be 1
end
it "expects the media type to be for an ebook" do
expect(rdf.ebooks[0].media_type).to eq 'text/plain'
end
end
end
end
end