require 'spec_helper'
describe GDor::Indexer::ModsFields do
let(:fake_druid) { 'oo000oo0000' }
let(:ns_decl) { "xmlns='#{Mods::MODS_NS}'" }
let(:mods_xml) { "gdor_mods_fields testing" }
def sdb_for_mods(m)
resource = Harvestdor::Indexer::Resource.new(double, fake_druid)
allow(resource).to receive(:public_xml).and_return(nil)
allow(resource).to receive(:mods).and_return(Nokogiri::XML(m))
i = Harvestdor::Indexer.new
i.logger.level = Logger::WARN
allow(resource).to receive(:indexer).and_return(i)
lgr = Logger.new(StringIO.new)
lgr.level = Logger::WARN
GDor::Indexer::SolrDocBuilder.new(resource, lgr)
end
# see https://consul.stanford.edu/display/NGDE/Required+and+Recommended+Solr+Fields+for+SearchWorks+documents
context 'summary_search solr field from ' do
it 'is populated when the MODS has a top level element' do
m = "blah blah"
sdb = sdb_for_mods(m)
expect(sdb.doc_hash_from_mods[:summary_search]).to match_array ['blah blah']
end
it 'has a value for each abstract element' do
m = "
one
two
"
sdb = sdb_for_mods(m)
expect(sdb.doc_hash_from_mods[:summary_search]).to match_array %w(one two)
end
it 'does not be present when there is no top level element' do
m = "blah blah"
sdb = sdb_for_mods(m)
expect(sdb.doc_hash_from_mods[:summary_search]).to be_nil
end
it 'does not be present if there are only empty abstract elements in the MODS' do
m = "notit"
sdb = sdb_for_mods(m)
expect(sdb.doc_hash_from_mods[:summary_search]).to be_nil
end
it 'summary_display should not be populated - it is a copy field' do
m = "blah blah"
sdb = sdb_for_mods(m)
expect(sdb.doc_hash_from_mods[:summary_display]).to be_nil
end
end # summary_search /
it 'language: should call sw_language_facet in stanford-mods gem to populate language field' do
sdb = sdb_for_mods(mods_xml)
smr = sdb.smods_rec
expect(smr).to receive(:sw_language_facet)
sdb.doc_hash_from_mods
end
context 'physical solr field from ' do
it 'is populated when the MODS has mods/physicalDescription/extent element' do
m = "blah blah"
sdb = sdb_for_mods(m)
expect(sdb.doc_hash_from_mods[:physical]).to match_array ['blah blah']
end
it 'has a value for each extent element' do
m = "
one
two
three
"
sdb = sdb_for_mods(m)
expect(sdb.doc_hash_from_mods[:physical]).to match_array %w(one two three)
end
it 'does not be present when there is no top level element' do
m = "foo"
sdb = sdb_for_mods(m)
expect(sdb.doc_hash_from_mods[:physical]).to be_nil
end
it 'does not be present if there are only empty physicalDescription or extent elements in the MODS' do
m = "notit"
sdb = sdb_for_mods(m)
expect(sdb.doc_hash_from_mods[:physical]).to be_nil
end
end # physical field from physicalDescription/extent
context 'url_suppl solr field from /mods/relatedItem/location/url' do
it 'is populated when the MODS has mods/relatedItem/location/url' do
m = "url.org"
sdb = sdb_for_mods(m)
expect(sdb.doc_hash_from_mods[:url_suppl]).to match_array ['url.org']
end
it 'has a value for each mods/relatedItem/location/url element' do
m = "
one
two
three
four
"
sdb = sdb_for_mods(m)
expect(sdb.doc_hash_from_mods[:url_suppl]).to match_array %w(one two three four)
end
it 'does not be populated from /mods/location/url element' do
m = "hi"
sdb = sdb_for_mods(m)
expect(sdb.doc_hash_from_mods[:url_suppl]).to be_nil
end
it 'does not be present if there are only empty relatedItem/location/url elements in the MODS' do
m = "
notit"
sdb = sdb_for_mods(m)
expect(sdb.doc_hash_from_mods[:url_suppl]).to be_nil
end
end
context 'toc_search solr field from ' do
it 'has a value for each tableOfContents element' do
m = "
one
two
"
sdb = sdb_for_mods(m)
expect(sdb.doc_hash_from_mods[:toc_search]).to match_array %w(one two)
end
it 'does not be present when there is no top level element' do
m = "foo"
sdb = sdb_for_mods(m)
expect(sdb.doc_hash_from_mods[:toc_search]).to be_nil
end
it 'does not be present if there are only empty tableOfContents elements in the MODS' do
m = "notit"
sdb = sdb_for_mods(m)
expect(sdb.doc_hash_from_mods[:toc_search]).to be_nil
end
end
context '#format_main_ssim' do
it 'doc_hash_from_mods calls #format_main_ssim' do
m = "nope"
sdb = sdb_for_mods(m)
expect(sdb).to receive(:format_main_ssim)
sdb.doc_hash_from_mods[:format_main_ssim]
end
it '#format_main_ssim calls stanford-mods.format_main' do
m = "nope"
sdb = sdb_for_mods(m)
expect(sdb.smods_rec).to receive(:format_main).and_return([])
sdb.send(:format_main_ssim)
end
it 'has a value when MODS data provides' do
m = "still image"
sdb = sdb_for_mods(m)
expect(sdb.send(:format_main_ssim)).to match_array ['Image']
end
it 'returns empty Array and logs warning if there is no value' do
sdb = sdb_for_mods(mods_xml)
expect(sdb.logger).to receive(:warn).with("#{fake_druid} has no SearchWorks Resource Type from MODS - check and other implicated MODS elements")
expect(sdb.send(:format_main_ssim)).to eq([])
end
end
context 'title fields' do
let(:title_mods) do
"
Jerk
The
is whom?
Joke
Alternative
"
end
let(:sdb) { sdb_for_mods(title_mods) }
let(:title_doc_hash) { sdb.doc_hash_from_mods }
it 'calls the appropriate methods in the stanford-mods gem to populate the fields' do
smr = sdb.smods_rec
expect(smr).to receive(:sw_short_title).at_least(:once)
expect(smr).to receive(:sw_full_title).at_least(:once)
expect(smr).to receive(:sw_title_display)
expect(smr).to receive(:sw_addl_titles)
expect(smr).to receive(:sw_sort_title)
sdb.doc_hash_from_mods
end
context 'search fields' do
it 'title_245a_search' do
expect(title_doc_hash[:title_245a_search]).to eq('The Jerk')
end
it 'title_245_search' do
expect(title_doc_hash[:title_245_search]).to eq('The Jerk : is whom?')
end
it 'title_variant_search' do
expect(title_doc_hash[:title_variant_search]).to match_array %w(Joke Alternative)
end
it 'title_related_search should not be populated from MODS' do
expect(title_doc_hash[:title_related_search]).to be_nil
end
end
context 'display fields' do
it 'title_display' do
expect(title_doc_hash[:title_display]).to eq('The Jerk : is whom?')
end
it 'title_245a_display' do
expect(title_doc_hash[:title_245a_display]).to eq('The Jerk')
end
it 'title_245c_display should not be populated from MODS' do
expect(title_doc_hash[:title_245c_display]).to be_nil
end
it 'title_full_display' do
expect(title_doc_hash[:title_full_display]).to eq('The Jerk : is whom?')
end
it 'removes trailing commas in title_display' do
title_mods = "
JerkTheis whom,
Joke
Alternative
"
sdb = sdb_for_mods(title_mods)
title_doc_hash = sdb.doc_hash_from_mods
expect(title_doc_hash[:title_display]).to eq('The Jerk : is whom')
end
it 'title_variant_display should not be populated - it is a copy field' do
expect(title_doc_hash[:title_variant_display]).to be_nil
end
end
it 'title_sort' do
expect(title_doc_hash[:title_sort]).to eq('Jerk is whom')
end
end # title fields
context 'author fields' do
let(:name_mods) do
"
John
Huston
drt
q
Crusty The Clown
Watchful Eye
Exciting Prints
lithographer
conference
"
end
let(:sdb) { sdb_for_mods(name_mods) }
let(:author_doc_hash) { sdb.doc_hash_from_mods }
it 'calls the appropriate methods in the stanford-mods gem to populate the fields' do
smr = sdb.smods_rec
expect(smr).to receive(:sw_main_author)
expect(smr).to receive(:sw_addl_authors)
expect(smr).to receive(:sw_person_authors).exactly(3).times
expect(smr).to receive(:sw_impersonal_authors)
expect(smr).to receive(:sw_corporate_authors)
expect(smr).to receive(:sw_meeting_authors)
expect(smr).to receive(:sw_sort_author)
sdb.doc_hash_from_mods
end
context 'search fields' do
it 'author_1xx_search' do
expect(author_doc_hash[:author_1xx_search]).to eq('Crusty The Clown')
end
it 'author_7xx_search' do
skip 'Should this return all authors? or only 7xx authors?'
expect(author_doc_hash[:author_7xx_search]).to match_array ['q', 'Watchful Eye', 'Exciting Prints', 'conference']
end
it 'author_8xx_search should not be populated from MODS' do
expect(author_doc_hash[:author_8xx_search]).to be_nil
end
end
context 'facet fields' do
it 'author_person_facet' do
expect(author_doc_hash[:author_person_facet]).to match_array ['q', 'Crusty The Clown']
end
it 'author_other_facet' do
expect(author_doc_hash[:author_other_facet]).to match_array ['Watchful Eye', 'Exciting Prints', 'conference']
end
end
context 'display fields' do
it 'author_person_display' do
expect(author_doc_hash[:author_person_display]).to match_array ['q', 'Crusty The Clown']
end
it 'author_person_full_display' do
expect(author_doc_hash[:author_person_full_display]).to match_array ['q', 'Crusty The Clown']
end
it 'author_corp_display' do
expect(author_doc_hash[:author_corp_display]).to match_array ['Watchful Eye', 'Exciting Prints']
end
it 'author_meeting_display' do
expect(author_doc_hash[:author_meeting_display]).to match_array ['conference']
end
end
it 'author_sort' do
expect(author_doc_hash[:author_sort]).to eq('Crusty The Clown')
end
end # author fields
# subject fields moved to mods_subject_fields_spec.rb
# publication fields moved to mods_pub_fields_spec.rb
end