# -*- coding: utf-8 -*- require 'spec_helper' describe Krikri::Harvesters::OAIHarvester do let(:args) { { uri: 'http://example.org/endpoint' } } subject { described_class.new(args) } it 'has a client' do expect(subject.client).to be_a OAI::Client end context 'with connection' do before do # TODO: webmock ListIdentifiers, test lazy resumption records = (10..110).map do |id| element = REXML::Element.new element.add_element REXML::Element.new('identifier').add_text( 'oai:oaipmh.huygens.knaw.nl:arthurianfiction:MAN' + id.to_s.rjust(10, '0')) OAI::Header.new(element) end allow(subject.client).to receive_message_chain(:list_identifiers, :full) .and_return(records) # TODO: better way of maintaining example OAI record results? # GetRecord -- Single record OAI Request stub_request(:get, 'http://example.org/endpoint?identifier='\ 'oai:oaipmh.huygens.knaw.nl:arthurianfiction:MAN0000000010'\ '&metadataPrefix=oai_dc&verb=GetRecord') .with(:headers => { 'Accept' => '*/*', 'Accept-Encoding' => 'gzip;q=1.0,deflate;q=0.6,identity;q=0.3' }) .to_return(:status => 200, :body => '2014-10-27T22:19:17Zhttp://oaipmh.huygens.knaw.nl/
oai:oaipmh.huygens.knaw.nl:arthurianfiction:MAN00000000102012-07-13T14:27:31Zarthurianfiction:manuscriptarthurianfiction
Aberystwyth, National Library of Wales, 446-E Bart Besamusca https://service.arthurianfiction.org/manuscript/MAN0000000010 2012-07-13T14:27:31Z Bart Besamusca model eng
', :headers => {}) # ListRecords -- Multiple record OAI Request (w/ resumption) response_body = <2014-10-27T23:05:33Zhttp://oaipmh.huygens.knaw.nl/
oai:oaipmh.huygens.knaw.nl:arthurianfiction:MAN00000000102012-07-13T14:27:31Zarthurianfiction:manuscriptarthurianfiction
Aberystwyth, National Library of Wales, 446-E Bart Besamusca https://service.arthurianfiction.org/manuscript/MAN0000000010 2012-07-13T14:27:31Z Bart Besamusca model eng
oai:oaipmh.huygens.knaw.nl:arthurianfiction:MAN00000000112012-07-13T14:27:31Zarthurianfictionarthurianfiction:manuscript
Aberystwyth, National Library of Wales, 5018-D Bart Besamusca https://service.arthurianfiction.org/manuscript/MAN0000000011 2012-07-13T14:27:31Z Bart Besamusca model eng
oai:oaipmh.huygens.knaw.nl:arthurianfiction:MAN00000000122012-07-13T14:27:31Zarthurianfictionarthurianfiction:manuscript
Aberystwyth, National Library of Wales, 445-D Bart Besamusca https://service.arthurianfiction.org/manuscript/MAN0000000012 2012-07-13T14:27:31Z Bart Besamusca model eng http://cdm16694.contentdm.oclc.org/oai/oai.php oai:cdm16694.contentdm.oclc.org:R6A001/1 2015-01-07 http://www.openarchives.org/OAI/2.0/
oai:oaipmh.huygens.knaw.nl:arthurianfiction:MAN00000000132012-07-13T14:27:31Zarthurianfiction:manuscriptarthurianfiction
Aberystwyth, National Library of Wales, 5667 E Bart Besamusca https://service.arthurianfiction.org/manuscript/MAN0000000013 2012-07-13T14:27:31Z Bart Besamusca model eng
MToxMHwyOnwzOnw0Onw1Om9haV9kYw==
EOM stub_request(:get, 'http://example.org/endpoint?metadataPrefix=oai_dc&verb='\ 'ListRecords') .with(:headers => { 'Accept' => '*/*', 'Accept-Encoding' => 'gzip;q=1.0,deflate;q=0.6,identity;q=0.3' }) .to_return(:status => 200, :body => response_body, :headers => {}) # with set response_body = <2014-10-27T23:05:33Zhttp://oaipmh.huygens.knaw.nl/
oai:oaipmh.huygens.knaw.nl:arthurianfiction:MAN00000000102012-07-13T14:27:31Zarthurianfiction:manuscriptarthurianfiction
Aberystwyth, National Library of Wales, 446-E Bart Besamusca https://service.arthurianfiction.org/manuscript/MAN0000000010 2012-07-13T14:27:31Z Bart Besamusca model eng
oai:oaipmh.huygens.knaw.nl:arthurianfiction:MAN00000000112012-07-13T14:27:31Zarthurianfictionarthurianfiction:manuscript
Aberystwyth, National Library of Wales, 5018-D Bart Besamusca https://service.arthurianfiction.org/manuscript/MAN0000000011 2012-07-13T14:27:31Z Bart Besamusca model eng
oai:oaipmh.huygens.knaw.nl:arthurianfiction:MAN00000000122012-07-13T14:27:31Zarthurianfictionarthurianfiction:manuscript
Aberystwyth, National Library of Wales, 445-D Bart Besamusca https://service.arthurianfiction.org/manuscript/MAN0000000012 2012-07-13T14:27:31Z Bart Besamusca model eng http://cdm16694.contentdm.oclc.org/oai/oai.php oai:cdm16694.contentdm.oclc.org:R6A001/1 2015-01-07 http://www.openarchives.org/OAI/2.0/
oai:oaipmh.huygens.knaw.nl:arthurianfiction:MAN00000000132012-07-13T14:27:31Zarthurianfiction:manuscriptarthurianfiction
Aberystwyth, National Library of Wales, 5667 E Bart Besamusca https://service.arthurianfiction.org/manuscript/MAN0000000013 2012-07-13T14:27:31Z Bart Besamusca model eng
MToxMHwyOnwzOnw0Onw1Om9haV9kYw==
EOM stub_request(:get, 'http://example.org/endpoint?metadataPrefix=mods&set=moomin&' \ 'verb=ListRecords') .with(:headers => { 'Accept' => '*/*', 'Accept-Encoding' => 'gzip;q=1.0,deflate;q=0.6,identity;q=0.3' }) .to_return(:status => 200, :body => response_body, :headers => {}) # ListRecords -- Multiple record OAI Request (resumed) stub_request(:get, 'http://example.org/endpoint?resumptionToken='\ 'MToxMHwyOnwzOnw0Onw1Om9haV9kYw==&verb=ListRecords') .with(:headers => { 'Accept' => '*/*', 'Accept-Encoding' => 'gzip;q=1.0,deflate;q=0.6,identity;q=0.3' }) .to_return(:status => 200, :body => '2014-10-27T23:05:33Zhttp://oaipmh.huygens.knaw.nl/
oai:oaipmh.huygens.knaw.nl:arthurianfiction:MAN00000000102012-07-13T14:27:31Zarthurianfiction:manuscriptarthurianfiction
Aberystwyth, National Library of Wales, 446-E Bart Besamusca https://service.arthurianfiction.org/manuscript/MAN0000000010 2012-07-13T14:27:31Z Bart Besamusca model eng
oai:oaipmh.huygens.knaw.nl:arthurianfiction:MAN00000000112012-07-13T14:27:31Zarthurianfictionarthurianfiction:manuscript
Aberystwyth, National Library of Wales, 5018-D Bart Besamusca https://service.arthurianfiction.org/manuscript/MAN0000000011 2012-07-13T14:27:31Z Bart Besamusca model eng
', :headers => {}) end it 'produces valid xml' do expect do Nokogiri::XML(subject.records.first.content) do |config| config.options = Nokogiri::XML::ParseOptions::STRICT end end.not_to raise_error end it 'produces has oai namespace and header' do expect(Nokogiri::XML(subject.records.first.content) .xpath('//xmlns:header')) .not_to be_empty end it 'retries timed out requests' do expect_any_instance_of(Faraday::Adapter::NetHttp) .to receive(:perform_request).at_least(4).times .and_raise(Net::ReadTimeout.new) expect { subject.records.first }.to raise_error Faraday::TimeoutError end it 'logs failed requests' do allow_any_instance_of(Faraday::Adapter::NetHttp) .to receive(:perform_request).and_raise(Net::ReadTimeout.new) expect(Rails.logger).to receive(:info).at_least(4).times expect { subject.records.first }.to raise_error Faraday::TimeoutError end describe 'resumption' do let(:resumed_uri) do 'http://example.org/endpoint?resumptionToken='\ 'MToxMHwyOnwzOnw0Onw1Om9haV9kYw==&verb=ListRecords' end it 'follows resumption token' do subject.records.each { |r| r } expect(WebMock).to have_requested(:get, resumed_uri).once end it 'only follows resumption token as far as requested' do subject.records.take(4).each { |r| r } expect(WebMock).not_to have_requested(:get, resumed_uri) end end describe '#sets' do before do response_body = < 2015-02-21T17:07:46Z http://fedora.digitalcommonwealth.org/oaiprovider/ commonwealth-oai:1r66j283x History of the Academy commonwealth-oai:1r66j2871 List of Vessels Belonging to the District of Gloucester commonwealth-oai:wp988k074 NOBLE Collection EOM stub_request(:get, "http://example.org/endpoint?verb=ListSets"). with(:headers => {'Accept'=>'*/*', 'Accept-Encoding'=>'gzip;q=1.0,deflate;q=0.6,identity;q=0.3'}). to_return(:status => 200, :body => response_body, :headers => {}) end it 'returns sets' do expect(subject.sets).to contain_exactly(an_instance_of(OAI::Set), an_instance_of(OAI::Set), an_instance_of(OAI::Set)) end it 'passes block to sets' do expect(subject.sets(&:spec)).to contain_exactly(an_instance_of(String), an_instance_of(String), an_instance_of(String)) end end describe 'options' do let(:result) { double } let(:args) do {uri: 'http://example.org/endpoint', oai: {metadata_prefix: 'mods'}} end let(:request_opts) { {set: 'moomin'} } shared_context 'oai options' do before do allow(result).to receive(:full).and_return([]) end end shared_examples 'send options' do it 'sends request with option' do expect(subject.client).to receive(request_type) .with(:metadata_prefix => args[:oai][:metadata_prefix]) .and_return(result) subject.send(method).first end it 'adds options passed into request' do expect(subject.client).to receive(request_type) .with(:metadata_prefix => args[:oai][:metadata_prefix], :set => request_opts[:set]) .and_return(result) subject.send(method, request_opts).first end context 'with multiple sets' do before { args[:oai][:set] = ['moomin', 'moomin'] } it 'skips sets when specific sets are selected' do opts = args[:oai].dup opts.delete(:set) expect(subject.client).to receive(request_type).with(opts) .and_return(result) subject.send(method, { :skip_set => 'moomin' }).first end it 'skips sets from full list when none given' do args[:oai].delete(:set) opts = args[:oai].dup opts[:set] = 'valid' allow(subject).to receive(:sets).and_return(['valid', 'moomin']) expect(subject.client).to receive(request_type).with(opts) .and_return(result) subject.send(method, { :skip_set => 'moomin' }).first end it 'skips sets that error' do args[:oai].delete(:set) invalid = args[:oai].dup invalid[:set] = 'invalid' valid = args[:oai].dup valid[:set] = 'valid' allow(subject).to receive(:sets).and_return(['invalid', 'valid', 'moomin']) expect(subject.client).to receive(request_type).with(invalid) .and_raise(OAI::Exception, '') expect(subject.client).to receive(request_type).with(valid) .and_return(result) subject.send(method, { :skip_set => 'moomin' }).first end end end describe '#records' do include_context 'oai options' include_examples 'send options' let(:request_type) { :list_records } let(:method) { :records } it 'combines sets' do args[:oai][:set] = ['moomin', 'moomin'] single_set = subject.records(:set => 'moomin').to_a expect(subject.records.to_a).to eq single_set.concat(single_set) end end describe 'record_ids' do include_context 'oai options' include_examples 'send options' let(:request_type) { :list_identifiers } let(:method) { :record_ids } end describe '#get_record' do before do allow(result).to receive(:record).and_return(oai_record) end let(:identifier) { 'comet_moominland' } let(:request_type) { :get_record } let(:oai_record) { OAI::Record.new(REXML::Element.new) } it 'sends request with option' do expect(subject.client).to receive(request_type) .with(:identifier => identifier, :metadata_prefix => args[:oai][:metadata_prefix]) .and_return(result) subject.get_record(identifier) end it 'adds options passed into request' do expect(subject.client).to receive(request_type) .with(:identifier => identifier, :metadata_prefix => args[:oai][:metadata_prefix], :set => request_opts[:set]) .and_return(result) subject.get_record(identifier, request_opts) end end end describe '#request_with_sets' do let(:sets) { [double('first'), double('second')] } let(:opts) { { :set => sets } } it 'is lazy' do expect { |b| subject.send(:request_with_sets, opts, &b) } .not_to yield_control end it 'sends requests' do expect { |b| subject.send(:request_with_sets, opts, &b) } .not_to yield_successive_args({ :set => opts[:set][0] }, { :set => opts[:set][1] }) end it 'sends requests lazily' do expect(subject).to receive(:give_results).once .and_return([1,2,3]) enum = subject.send(:request_with_sets, opts) do |b| subject.give_results end 3.times { enum.next } end end describe '#enqueue' do let(:args) do {uri: 'http://example.org/endpoint', oai: {metadata_prefix: 'mods'}} end before do Resque.remove_queue('harvest') # Not strictly necessary. Future? Krikri::Activity.delete_all end it 'saves harvest options correctly when creating an activity' do # Ascertain that options particular to this harvester type are # serialized and deserialized properly. described_class.enqueue(opts = args) activity = Krikri::Activity.first opts = JSON.parse(activity.opts, symbolize_names: true) expect(opts).to eq(args) end end describe 'concat_enum' do it 'concatenates enums' do expect(subject.concat_enum([(1..10), (100..110)]).to_a) .to eq (1..10).to_a.concat((100..110).to_a) end it 'works with lazy' do enum = double expect(enum).not_to receive :each subject.concat_enum([(1..10), enum]).lazy.take(10).each(&:inspect) end end describe 'identifiers' do let(:id) { 'oai:oaipmh.huygens.knaw.nl:arthurianfiction:MAN0000000010' } it 'uses oai header identifier by default' do expect(subject).to receive(:mint_id).with(id).and_return('id') subject.records.first end context 'with :id_path' do let(:args) do { uri: 'http://example.org/endpoint', oai: { id_path: '//dc:identifier' } } end it 'returns the identifier from a given xpath' do dcid = 'https://service.arthurianfiction.org/manuscript/MAN0000000010' expect(subject).to receive(:mint_id).with(dcid).and_return('id') subject.records.first end context 'and bad path' do let(:id_path) { '//dc:not_a_path' } let(:args) do { uri: 'http://example.org/endpoint', oai: { id_path: id_path } } end it 'fails and raises error' do expect(Rails.logger).to receive(:error).with(include(*id_path, id)) subject.records.first end end context 'and bad namespace' do let(:id_path) { '//fkns:identifier' } let(:args) do { uri: 'http://example.org/endpoint', oai: { id_path: '//fkns:identifier' } } end it 'fails and raises error' do expect(Rails.logger).to receive(:error).with(include(id_path, id)) subject.records.first end end end end it_behaves_like 'a harvester' end end describe Krikri::Harvester::Registry do describe '#registered?' do it 'knows OAIHarvester is registered' do # It should have been registered by the engine initializer, engine.rb. expect(described_class.registered?(:oai)).to be true end end end