describe GDor::Indexer::PublicXmlFields do before(:all) do @fake_druid = 'oo000oo0000' @ns_decl = "xmlns='#{Mods::MODS_NS}'" @mods_xml = "public_xml_fields tests" @empty_pub_xml = "" end let(:logger) { Logger.new(StringIO.new) } def sdb_for_pub_xml(m) resource = Harvestdor::Indexer::Resource.new(double, @fake_druid) allow(resource).to receive(:public_xml).and_return(Nokogiri::XML(m)) allow(resource).to receive(:mods).and_return(@mods_xml) GDor::Indexer::SolrDocBuilder.new(resource, logger) end def sdb_for_content_md(m) resource = Harvestdor::Indexer::Resource.new(double, @fake_druid) allow(resource).to receive(:content_metadata).and_return(Nokogiri::XML(m)) allow(resource).to receive(:public_xml).and_return(@empty_pub_xml) allow(resource).to receive(:mods).and_return(@mods_xml) GDor::Indexer::SolrDocBuilder.new(resource, logger) end # NOTE: # "Doubles, stubs, and message expectations are all cleaned out after each example." # per https://www.relishapp.com/rspec/rspec-mocks/docs/scope context 'contentMetadata fields and methods' do before(:all) do @content_md_start = "" @content_md_end = '' @cntnt_md_type = 'image' @cntnt_md_xml = "#{@content_md_end}" @pub_xml = "#{@cntnt_md_xml}" @ng_pub_xml = Nokogiri::XML(@pub_xml) end context 'dor_content_type' do it 'is the value of the type attribute on element' do val = 'foo' cntnt_md = "#{@content_md_end}" sdb = sdb_for_content_md(cntnt_md) expect(sdb.send(:dor_content_type)).to eq(val) end it 'logs an error message if there is no content type' do cntnt_md = "#{@content_md_start}#{@content_md_end}" sdb = sdb_for_content_md(cntnt_md) expect(sdb.logger).to receive(:error).with("#{@fake_druid} has no DOR content type ( element may be missing type attribute)") sdb.send(:dor_content_type) end end context 'display_type' do let :sdb do sdb_for_pub_xml @empty_pub_xml end it "'image' for dor_content_type 'image'" do allow(sdb).to receive(:dor_content_type).and_return('image') expect(sdb.display_type).to eq('image') end it "'image' for dor_content_type 'manuscript'" do allow(sdb).to receive(:dor_content_type).and_return('manuscript') expect(sdb.display_type).to eq('image') end it "'image' for dor_content_type 'map'" do allow(sdb).to receive(:dor_content_type).and_return('map') expect(sdb.display_type).to eq('image') end it "'file' for dor_content_type 'media'" do allow(sdb).to receive(:dor_content_type).and_return('media') expect(sdb.display_type).to eq('file') end it "'book' for dor_content_type 'book'" do allow(sdb).to receive(:dor_content_type).and_return('book') expect(sdb.display_type).to eq('book') end it "'file' for unrecognized dor_content_type" do allow(sdb).to receive(:dor_content_type).and_return('foo') expect(sdb.display_type).to eq('file') end end # display_type context '#file_ids' do context 'file display_type' do context 'contentMetadata type=file, resource type=file' do it 'is id attrib of file element in single resource element with type=file' do m = ' ' sdb = sdb_for_content_md(m) expect(sdb.file_ids).to match_array ['TR180_Shahi.pdf'] end it 'is id attrib of file elements in multiple resource elements with type=file' do m = ' ' sdb = sdb_for_content_md(m) expect(sdb.file_ids).to match_array ['ATE.PDF', 'SC0524_2013-047_b8_811.mp4', 'SAILDART.zip', 'DougEngelbart041306.wav'] end end # contentMetadata type=file, resource type=file it 'contentMetadata type=geo, resource type=object' do m = ' ' sdb = sdb_for_content_md(m) expect(sdb.file_ids).to match_array ['data.zip', 'preview.jpg'] end # FIXME: non-file resource types end # file display_type context 'image display_type' do context 'contentMetadata type=image' do it 'resource type=image should be id attrib of file elements' do m = ' ' sdb = sdb_for_content_md m expect(sdb.file_ids).to match_array ['rg759wj0953_00_0003.jp2', 'rg759wj0953_00_00_0001.jp2'] end it 'resource type=object should be ignored' do m = ' ' sdb = sdb_for_content_md(m) expect(sdb.file_ids).to be_nil end end # contentMetadata type=image context 'contentMetadata type=map, resource type=image' do it 'is id attrib of file elements' do m = ' ' sdb = sdb_for_content_md(m) expect(sdb.file_ids).to match_array ['rf935xg1061_00_0001.jp2', 'rf935xg1061_00_0002.jp2'] end end # contentMetadata type=map, resource type=image context 'contentMetadata type=manuscript' do it 'resource type=image' do m = ' ' sdb = sdb_for_content_md(m) expect(sdb.file_ids).to match_array ['T0000001.jp2', 'T0000343.jp2'] end it 'resource type=page should be ignored' do m = ' ' sdb = sdb_for_content_md(m) expect(sdb.file_ids).to be_nil end end # contentMetadata type=manuscript end # image display_type it 'is nil for book display_type' do m = ' ' sdb = sdb_for_content_md(m) expect(sdb.file_ids).to be_nil end it 'is id attrib of file elements for media display_type' do m = ' ' sdb = sdb_for_content_md(m) expect(sdb.file_ids).to match_array ['jy496kh1727_sl.mp3', 'jy496kh1727_img_1.jp2'] end it 'is nil if there are no elements in the contentMetadata' do m = '' sdb = sdb_for_content_md(m) expect(sdb.file_ids).to be_nil end it 'is nil if there are no elements in the contentMetadata' do m = ' ' sdb = sdb_for_content_md(m) expect(sdb.file_ids).to be_nil end it 'is nil if there are no id elements on file elements' do m = "#{@content_md_start}#{@content_md_end}" sdb = sdb_for_content_md(m) expect(sdb.file_ids).to be_nil end # TODO: multiple file elements in a single resource element end # file_ids end # contentMetadata fields and methods end