# Encoding: UTF-8 require 'test_helper' require 'traject/indexer' require 'traject/macros/marc21_semantics' require 'json' require 'marc/record' # See also marc_extractor_test.rb for more detailed tests on marc extraction, # this is just a basic test to make sure our macro works passing through to there # and other options. describe "Traject::Macros::Marc21Semantics" do Marc21Semantics = Traject::Macros::Marc21Semantics # shortcut before do @indexer = Traject::Indexer.new @indexer.extend Marc21Semantics @record = MARC::Reader.new(support_file_path "manufacturing_consent.marc").to_a.first end it "oclcnum" do @indexer.instance_eval do to_field "oclcnum", oclcnum end output = @indexer.map_record(@record) assert_equal %w{47971712}, output["oclcnum"] assert_equal({}, @indexer.map_record(empty_record)) end it "#marc_series_facet" do @record = MARC::Reader.new(support_file_path "louis_armstrong.marc").to_a.first @indexer.instance_eval do to_field "series_facet", marc_series_facet end output = @indexer.map_record(@record) # trims punctuation too assert_equal ["Big bands"], output["series_facet"] assert_equal({}, @indexer.map_record(empty_record)) end describe "marc_sortable_author" do # these probably should be taking only certain subfields, but we're copying # from SolrMarc that didn't do so either and nobody noticed, so not bothering for now. before do @indexer.instance_eval do to_field "author_sort", marc_sortable_author end end it "collates author and title" do output = @indexer.map_record(@record) assert_equal ["Herman, Edward S. Manufacturing consent the political economy of the mass media Edward S. Herman and Noam Chomsky ; with a new introduction by the authors"], output["author_sort"] assert_equal [""], @indexer.map_record(empty_record)['author_sort'] end it "respects non-filing" do @record = MARC::Reader.new(support_file_path "the_business_ren.marc").to_a.first output = @indexer.map_record(@record) assert_equal ["Business renaissance quarterly [electronic resource]."], output["author_sort"] assert_equal [""], @indexer.map_record(empty_record)['author_sort'] end end describe "marc_sortable_title" do before do @indexer.instance_eval { to_field "title_sort", marc_sortable_title } end it "works" do output = @indexer.map_record(@record) assert_equal ["Manufacturing consent : the political economy of the mass media"], output["title_sort"] assert_equal({}, @indexer.map_record(empty_record)) end it "respects non-filing" do @record = MARC::Reader.new(support_file_path "the_business_ren.marc").to_a.first output = @indexer.map_record(@record) assert_equal ["Business renaissance quarterly"], output["title_sort"] end it "works with a record with no 245$ab" do @record = MARC::Reader.new(support_file_path "245_no_ab.marc").to_a.first output = @indexer.map_record(@record) assert_equal ["Papers"], output["title_sort"] end end describe "marc_languages" do before do @indexer.instance_eval {to_field "languages", marc_languages() } end it "unpacks packed 041a and translates" do @record = MARC::Reader.new(support_file_path "packed_041a_lang.marc").to_a.first output = @indexer.map_record(@record) assert_equal ["English", "French", "German", "Italian", "Spanish", "Russian"], output["languages"] assert_equal({}, @indexer.map_record(empty_record)) end end describe "marc_instrumentation_humanized" do before do @record = MARC::Reader.new(support_file_path "musical_cage.marc").to_a.first @indexer.instance_eval {to_field "instrumentation", marc_instrumentation_humanized } end it "translates, de-duping" do output = @indexer.map_record(@record) assert_equal ["Larger ensemble, Unspecified", "Piano", "Soprano voice", "Tenor voice", "Violin", "Larger ensemble, Ethnic", "Guitar", "Voices, Unspecified"], output["instrumentation"] assert_equal({}, @indexer.map_record(empty_record)) end end describe "marc_instrument_codes_normalized" do before do @record = MARC::Reader.new(support_file_path "musical_cage.marc").to_a.first @indexer.instance_eval {to_field "instrument_codes", marc_instrument_codes_normalized } end it "normalizes, de-duping" do output = @indexer.map_record(@record) assert_equal ["on", "ka01", "ka", "va01", "va", "vd01", "vd", "sa01", "sa", "oy", "tb01", "tb", "vn12", "vn"], output["instrument_codes"] end it "codes soloist 048$b" do @record = MARC::Reader.new(support_file_path "louis_armstrong.marc").to_a.first output = @indexer.map_record(@record) assert_equal ["bb01", "bb01.s", "bb", "bb.s", "oe"], output["instrument_codes"] assert_equal({}, @indexer.map_record(empty_record)) end end describe "publication_date" do # there are way too many edge cases for us to test em all, but we'll test some of em. it "works when there's no date information" do assert_equal nil, Marc21Semantics.publication_date(empty_record) end it "uses macro correctly with no date info" do @indexer.instance_eval {to_field "date", marc_publication_date } assert_equal({}, @indexer.map_record(empty_record)) end it "pulls out 008 date_type s" do @record = MARC::Reader.new(support_file_path "manufacturing_consent.marc").to_a.first assert_equal 2002, Marc21Semantics.publication_date(@record) end it "uses start date for date_type c continuing resource" do @record = MARC::Reader.new(support_file_path "the_business_ren.marc").to_a.first assert_equal 2006, Marc21Semantics.publication_date(@record) end it "returns nil when the records really got nothing" do @record = MARC::Reader.new(support_file_path "emptyish_record.marc").to_a.first assert_equal nil, Marc21Semantics.publication_date(@record) end it "estimates with a single 'u'" do @record = MARC::Reader.new(support_file_path "date_with_u.marc").to_a.first # was 184u as date1 on a continuing resource. For continuing resources, # we take the first date. And need to deal with the u. assert_equal 1845, Marc21Semantics.publication_date(@record) end it "resorts to 260c" do @record = MARC::Reader.new(support_file_path "date_resort_to_260.marc").to_a.first assert_equal 1980, Marc21Semantics.publication_date(@record) end it "works with date type r missing date2" do @record = MARC::Reader.new(support_file_path "date_type_r_missing_date2.marc").to_a.first assert_equal 1957, Marc21Semantics.publication_date(@record) end it "works correctly with date type 'q'" do val = @record['008'].value val[6] = 'q' val[7..10] = '191u' val[11..14] = '192u' @record['008'].value = val # Date should be date1 + date2 / 2 = (1910 + 1929) / 2 = 1919 estimate_tolerance = 30 assert_equal 1919, Marc21Semantics.publication_date(@record, estimate_tolerance) end end describe "marc_lcc_to_broad_category" do before do @indexer.instance_eval {to_field "discipline_facet", marc_lcc_to_broad_category } end it "maps a simple example" do @record = MARC::Reader.new(support_file_path "manufacturing_consent.marc").to_a.first output = @indexer.map_record(@record) assert_equal ["Language & Literature"], output["discipline_facet"] end it "maps to default" do @record = MARC::Reader.new(support_file_path "musical_cage.marc").to_a.first output = @indexer.map_record(@record) assert_equal ["Unknown"], output["discipline_facet"] assert_equal(["Unknown"], @indexer.map_record(empty_record)['discipline_facet']) end it "maps to nothing if none and no default" do @indexer.instance_eval {to_field "discipline_no_default", marc_lcc_to_broad_category(:default => nil)} @record = MARC::Reader.new(support_file_path "musical_cage.marc").to_a.first output = @indexer.map_record(@record) assert_nil output["discipline_no_default"] assert_nil @indexer.map_record(empty_record)["discipline_no_default"] end describe "LCC_REGEX" do it "rejects a non-LCC" do refute_match Traject::Macros::Marc21Semantics::LCC_REGEX, "Film no. A .N285" end end end describe "marc_geo_facet" do before do @indexer.instance_eval {to_field "geo_facet", marc_geo_facet } end it "maps a complicated record" do @record = MARC::Reader.new(support_file_path "multi_geo.marc").to_a.first output = @indexer.map_record(@record) assert_equal ["Europe", "Middle East", "Africa, North", "Agora (Athens, Greece)", "Rome (Italy)", "Italy"], output["geo_facet"] assert_equal({}, @indexer.map_record(empty_record)) end it "maps nothing on a record with no geo" do @record = MARC::Reader.new(support_file_path "manufacturing_consent.marc").to_a.first output = @indexer.map_record(@record) assert_nil output["geo_facet"] assert_equal({}, @indexer.map_record(empty_record)) end end describe "marc_era_facet" do before do @indexer.instance_eval {to_field "era_facet", marc_era_facet} end it "maps a complicated record" do @record = MARC::Reader.new(support_file_path "multi_era.marc").to_a.first output = @indexer.map_record(@record) assert_equal ["Early modern, 1500-1700", "17th century", "Great Britain: Puritan Revolution, 1642-1660", "Great Britain: Civil War, 1642-1649", "1642-1660"], output["era_facet"] assert_equal({}, @indexer.map_record(empty_record)) end end describe "marc_lcsh_display" do it "formats typical field" do field = MARC::DataField.new('650', ' ', ' ', ['a', 'Psychoanalysis and literature'], ['z', 'England'], ['x', 'History'], ['y', '19th century.']) str = Marc21Semantics.assemble_lcsh(field) assert_equal "Psychoanalysis and literature — England — History — 19th century", str end it "ignores numeric subfields" do field = MARC::DataField.new('650', ' ', ' ', ['a', 'Psychoanalysis and literature'], ['x', 'History'], ['0', '01234'], ['3', 'Some part']) str = Marc21Semantics.assemble_lcsh(field) assert_equal "Psychoanalysis and literature — History", str end it "doesn't put subdivision in wrong place" do field = MARC::DataField.new('600', ' ', ' ', ['a', 'Eliot, George,'],['d', '1819-1880.'], ['t', 'Middlemarch']) str = Marc21Semantics.assemble_lcsh(field) assert_equal "Eliot, George, 1819-1880. Middlemarch", str end it "mixes non-subdivisions with subdivisions" do field = MARC::DataField.new('600', ' ', ' ', ['a', 'Eliot, George,'],['d', '1819-1880.'], ['t', 'Middlemarch'], ['x', 'Criticism.']) str = Marc21Semantics.assemble_lcsh(field) assert_equal "Eliot, George, 1819-1880. Middlemarch — Criticism", str end it "returns nil for a field with no relevant subfields" do field = MARC::DataField.new('650', ' ', ' ') assert_nil Marc21Semantics.assemble_lcsh(field) end describe "marc_lcsh_formatted macro" do it "smoke test" do @record = MARC::Reader.new(support_file_path "george_eliot.marc").to_a.first @indexer.instance_eval {to_field "lcsh", marc_lcsh_formatted} output = @indexer.map_record(@record) assert output["lcsh"].length > 0, "outputs data" assert output["lcsh"].include?("Eliot, George, 1819-1880 — Characters"), "includes a string its supposed to" assert_equal({}, @indexer.map_record(empty_record)) end end end describe "extract_marc_filing_version" do before do @record = MARC::Reader.new(support_file_path "the_business_ren.marc").to_a.first end it "works as expected" do @indexer.instance_eval do to_field 'title_phrase', extract_marc_filing_version('245ab') end output = @indexer.map_record(@record) assert_equal ['Business renaissance quarterly'], output['title_phrase'] assert_equal({}, @indexer.map_record(empty_record)) end it "works with :include_original" do @indexer.instance_eval do to_field 'title_phrase', extract_marc_filing_version('245ab', :include_original=>true) end output = @indexer.map_record(@record) assert_equal ['The Business renaissance quarterly', 'Business renaissance quarterly'], output['title_phrase'] assert_equal({}, @indexer.map_record(empty_record)) end it "doesn't do anything if you don't include the first subfield" do @indexer.instance_eval do to_field 'title_phrase', extract_marc_filing_version('245h') end output = @indexer.map_record(@record) assert_equal ['[electronic resource].'], output['title_phrase'] assert_equal({}, @indexer.map_record(empty_record)) end it "dies if you pass it something else" do assert_raises(RuntimeError) do @indexer.instance_eval do to_field 'title_phrase', extract_marc_filing_version('245ab', :include_original=>true, :uniq => true) end end end end end