require 'spec_helper' module GutenbergRdf describe Rdf do describe "basic metadata" do let(:xml) do ' 2006-09-28 en Project Gutenberg Public domain in the USA. ' end let(:rdf) { Rdf.new(REXML::Document.new(xml)) } it "expects an id" do expect(rdf.id).to eql "98765" end it "expects a published date" do expect(rdf.published).to eql "2006-09-28" end it "expects a publisher" do expect(rdf.publisher).to eql "Project Gutenberg" end it "expects a language" do expect(rdf.language).to eql "en" end it "expects the rights" do expect(rdf.rights).to eql "Public domain in the USA." end end describe "#type" do let(:xml) do ' Text ' end let(:rdf) { Rdf.new(REXML::Document.new(xml)) } it "expect the type of entity" do expect(rdf.type).to eql 'Text' end end describe "Titles" do let(:xml) do ' A Great Title ' end let(:rdf) { Rdf.new(REXML::Document.new(xml)) } it "expects a title" do expect(rdf.title).to eql 'A Great Title' end it "expects subtitle to be empty" do expect(rdf.subtitle).to eql '' end context "with a title and subtitle, on separate lines" do let(:xml) do ' A Great Multi-Title Or, a Subtitle ' end let(:rdf) { Rdf.new(REXML::Document.new(xml)) } it "expects the title to be the first line" do expect(rdf.title).to eql 'A Great Multi-Title' end it "expects the subtitle to be the second line" do expect(rdf.subtitle).to eql 'Or, a Subtitle' end end context "with; title, or, subtitle (we need to split on the 'or')" do let(:xml) do ' A Great Multi-Title, or, a Subtitle ' end let(:rdf) { Rdf.new(REXML::Document.new(xml)) } it "expects the title to be the first line" do expect(rdf.title).to eql 'A Great Multi-Title' end it "expects the subtitle to be the second line" do expect(rdf.subtitle).to eql 'a Subtitle' end end context "when title:subtitle are separated by a colon" do let(:xml) do ' A Great Multi-Title: And a Subtitle ' end let(:rdf) { Rdf.new(REXML::Document.new(xml)) } it "expects a title" do expect(rdf.title).to eql 'A Great Multi-Title' end it "expects a subtitle" do expect(rdf.subtitle).to eql 'And a Subtitle' end end context "when title; and subtitle are separated by a semi-colon" do let(:xml) do ' A Great Multi-Title; Or, a Subtitle ' end let(:rdf) { Rdf.new(REXML::Document.new(xml)) } it "expects a title" do expect(rdf.title).to eql 'A Great Multi-Title' end it "expects a subtitle" do expect(rdf.subtitle).to eql 'Or, a Subtitle' end context "...except when subtitles already exists" do let(:xml) do ' A Great Multi-Title; and some other text Then a Subtitle on a newline ' end let(:rdf) { Rdf.new(REXML::Document.new(xml)) } it "expects a title" do expect(rdf.title).to eql 'A Great Multi-Title; and some other text' end it "expects a subtitle" do expect(rdf.subtitle).to eql 'Then a Subtitle on a newline' end end end end describe "#authors" do let(:xml) do ' Verschillende Various 1830 1905 Dodge, Mary Mapes Dodge, Mary ' end let(:rdf) { Rdf.new(REXML::Document.new(xml)) } it "returns the correct number of authors" do expect(rdf.authors.count).to be 2 end it "expects an Agent object" do expect(rdf.authors[0]).to be_an_instance_of Rdf::Agent end it "expects the author to have an aut role" do expect(rdf.authors[0].role).to eq 'aut' end it "has the correct author names" do expect(rdf.authors[1].fullname).to eq 'Mary Mapes Dodge' end it "expects other agents to have the correct role" do expect(rdf.authors[1].role).to eq 'ctb' end end describe "#subjects" do let(:xml) do %q{ PZ Children's literature -- Periodicals Children's periodicals, American } end let(:rdf) { Rdf.new(REXML::Document.new(xml)) } it "expects correct number to be returned" do expect(rdf.subjects.count).to be 2 end it "expects the correct data" do expect(rdf.subjects.first).to eql "Children's literature -- Periodicals" expect(rdf.subjects.last).to eql "Children's periodicals, American" end end describe "#covers" do describe "official PG covers" do let(:xml) do ' 2699 image/jpeg 2013-03-25T20:57:55.668737 http://www.gutenberg.org/files/98765/98765-h/images/cover.jpg file:///public/vhost/g/gutenberg/html/files/98765/98765-h/images/cover.jpg file:///public/vhost/g/gutenberg/html/files/98765/98765-rst/images/cover.jpg 2013-03-25T20:57:55.889736 10856 image/jpeg ' end let(:rdf) { Rdf.new(REXML::Document.new(xml)) } it "expects the correct number of entries returned" do expect(rdf.covers.count).to be 4 end it "expect medium cover url to be first in the list" do expect(rdf.covers[0]).to eql 'http://www.gutenberg.org/cache/epub/98765/pg98765.cover.medium.jpg' end it "expect the small cover url after the medium" do expect(rdf.covers[1]).to eql 'http://www.gutenberg.org/cache/epub/98765/pg98765.cover.small.jpg' end it "expects any other images to be listed after the official ones" do expect(rdf.covers[2]).to eql 'http://www.gutenberg.org/files/98765/98765-h/images/cover.jpg' end end describe "HTML ebook cover image" do let(:xml) do ' file:///public/vhost/g/gutenberg/html/files/98765/98765-rst/images/cover.jpg file:///public/vhost/g/gutenberg/html/files/98765/98765-h/images/cover.jpg http://www.gutenberg.org/files/98765/98765-h/images/cover.jpg ' end let(:rdf) { Rdf.new(REXML::Document.new(xml)) } it "expects only unique entries" do expect(rdf.covers.count).to be 2 end it "should convert File URIs to the Gutenberg URL" do expect(rdf.covers.first).to match 'http://www.gutenberg.org' end it "expects the HTML cover to be listed first" do expect(rdf.covers[0]).to eql 'http://www.gutenberg.org/files/98765/98765-h/images/cover.jpg' end it "expects the RST cover to be listed after the HTML" do expect(rdf.covers[1]).to eql 'http://www.gutenberg.org/files/98765/98765-rst/images/cover.jpg' end end end describe "#ebook" do let(:xml) do ' 293684 text/plain 2010-02-16T08:29:52.373092 application/zip 2006-09-28T12:37:26 text/plain; charset=us-ascii 116685 ' end let(:rdf) { Rdf.new(REXML::Document.new(xml)) } it "expects the correct number of entries" do expect(rdf.ebooks.count).to be 2 end it "expects an entry to be a Media class" do expect(rdf.ebooks.first).to be_an_instance_of Rdf::Media end context "only collect ebook media files" do let(:xml) do ' 293684 text/plain 2010-02-16T08:29:52.373092 2699 image/jpeg 2013-03-25T20:57:55.668737 ' end let(:rdf) { Rdf.new(REXML::Document.new(xml)) } it "only extracts one media file" do expect(rdf.ebooks.count).to be 1 end it "expects the media type to be for an ebook" do expect(rdf.ebooks[0].media_type).to eq 'text/plain' end end end end end