# coding: utf-8 module Mabmapper class AlephMabXmlEngine < Engine document_class MabXml::Document # # Id # field :id do doc.controlfield('SYS').join end # # Status - Neu (A) oder gelöscht (D) # field :status do value = 'A' # gelöscht -> LDR Position 6 == 'd' value = 'D' if doc.controlfield('LDR').at(5) == 'd' # ausgesondert über Feld 078 value = 'D' if doc.field('078', ind1: 'r').subfield('a').get.value.try(:downcase) == 'aus' # Standort Detmold unterdrücken detmold_locations = doc.field('LOC').subfield('n').get.values.flatten value = 'D' if detmold_locations.present? && detmold_locations.all?{|v| v == '50'} # Interimsaufnahmen unterdrücken value = 'D' if doc.field('537', ind1: '-', ind2: '1').subfield('a').get.values.flatten.any? { |v| v.downcase.include? 'interimsaufnahme' } value end # # HT Nummer # field :ht_number do doc.field('001', ind2: '1').subfield('a').get.value end # # Creation date # field :creation_date do doc.field('LOC', ind2: ' ').subfield('k').get.values.flatten.map(&:presence).compact.uniq end # # Materialtyp # field :materialtyp do f050 = doc.controlfield('050') type = case when (%w(a ).include?(f050.at( 0)) ) then 'print' when (%w(a b c).include?(f050.at( 3)) ) then 'microform' when (%w(a ).include?(f050.at( 5)) ) then 'audio' when (%w(b c z).include?(f050.at( 5)) ) then 'video' when (%w(d ).include?(f050.at( 5)) ) then 'image' when (%w(d ).include?(f050.at( 8)) ) then 'data_storage' when (%w(g ).include?(f050.at( 8)) ) then 'online_resource' when (%w(a ).include?(f050.at( 9)) ) then 'game' when (%w(a ).include?(f050.at(10)) ) then 'map' else 'other' end type end # # Inhaltstyp # field :inhaltstyp do f051 = doc.controlfield('051') f052 = doc.controlfield('052') f051s = f051.join.slice(1..3) || "" f052s = f052.join.slice(1..6) || "" type = case # Monos when (f051s.include?('b')) then 'bibliography' when (f051s.include?('c')) then 'catalog' when (f051s.include?('d')) then 'dictionary' when (f051s.include?('e')) then 'encyclopedia' when (f051s.include?('f')) then 'festschrift' when (f051s.include?('h')) then 'biography' when (f051s.include?('k')) then 'congress' when (f051s.include?('m')) then 'music' when (f051s.include?('n')) then 'standard' when (f051s.include?('u')) then 'university_text' when (f051s.include?('x')) then 'textbook' when (f051s.include?('y')) then 'dissertation' # Fortlaufende Sammelwerke (Zeitschriften u.ä.) when (f052s.include?('bi')) then 'bibliography' when (f052s.include?('ww')) then 'dissertation' when (f052s.include?('fs')) then 'festschrift' when (f052s.include?('ko')) then 'congress' when (f052s.include?('wb')) then 'dictionary' when (f052s.include?('ez')) then 'encyclopedia' when (f052s.include?('bg')) then 'biography' when (f052s.include?('mu')) then 'music' when (f052s.include?('no')) then 'standard' when (f052s.include?('sc')) then 'textbook' # ... der Rest else 'other' end type end # # Bandzähler von Reihen / Stücktiteln # field :volume_count do # Bandzählung von Reihen volumes = [] (456..496).step(10) do |f| value = doc.field("#{f}", ind2: '1').subfield('a').get.value volumes << value if value.present? end volumes.first end # # Title # field :title do f331_2 = doc.field('331', ind2: '2').subfield('a').get.value f333_2 = doc.field('333', ind2: '2').subfield('a').get.value f335_2 = doc.field('335', ind2: '2').subfield('a').get.value f360_2 = doc.field('360', ind2: '2').subfield('a').get.value f089_1 = doc.field('089', ind2: '1').subfield('a').get.value f331_1 = doc.field('331', ind2: '1').subfield('a').get.value f333_1 = doc.field('333', ind2: '1').subfield('a').get.value f335_1 = doc.field('335', ind2: '1').subfield('a').get.value f360_1 = doc.field('360', ind2: '1').subfield('a').get.value f304_1 = doc.field('304', ind2: '1').subfield('a').get.value f310_1 = doc.field('310', ind2: '1').subfield('a').get.value f340_1 = doc.field('340', ind2: '1').subfield('a').get.value f341_1 = doc.field('341', ind2: '1').subfield('a').get.value title = merge(title, f331_2, delimiter: '. ') title = merge(title, f333_2, delimiter: ' / ') title = merge(title, f335_2, delimiter: ' : ') title = merge(title, f360_2, delimiter: '. ') title = merge(title, f089_1, delimiter: ". #{(f089_1.present? && f089_1.length <= 3) ? 'Bd. ' : ''}") # only add 'Bd.' if Bandzählung is something trivial like 123, A or B-C title = merge(title, f331_1, delimiter: '. ') title = merge(title, f333_1, delimiter: ' / ') title = merge(title, f335_1, delimiter: ' : ') title = merge(title, f360_1, delimiter: '. ') title = merge(title, f304_1, delimiter: '. ') title = merge(title, f310_1, delimiter: ' ', wrap: '[@]') title = merge(title, f340_1.presence || f341_1, delimiter: ' = ') title.presence || '–' end field :title_display do ref(:title).gsub(/<<|>>/, '') end field :title_sort do ref(:title).gsub(/<<.*>>/, '').gsub(/\s\s/, ' ').strip end field :short_title_display do f089_1 = doc.field('089', ind2: '1').subfield('a').get.value # Bandangabe in Vorlageform f310_1 = doc.field('310', ind1: ['-', 'a'], ind2: '1').subfield('a').get.value # Hauptsachtitel in Ansetzungsform f331_1 = doc.field('331', ind2: '1').subfield('a').get.value # Hauptsachtitel in Vorlageform oder Mischform f331_2 = doc.field('331', ind2: '2').subfield('a').get.value # f334_1 = doc.field('334', ind1: '-', ind2: '1').get.value # Allgemeine Materialbenennung f335_1 = doc.field('335', ind2: '1').subfield('a').get.value # Zusätze zum Hauptsachtitel short_title = if f310_1 f310_1 elsif (f331_1 && f334_1 && f335_1) "#{f331_1} [#{f334_1}] : #{f335_1}" elsif (f331_1 && f335_1) "#{f331_1} : #{f335_1}" elsif f331_1 && f334_1 "#{f331_1} [#{f334_1}]" elsif f331_1 f331_1 elsif f089_1 && f089_1.length > 3 && f089_1[/\A(\d|\s)+\Z/].nil? && !['buch', 'hauptbd.'].include?(f089_1.gsub(/\[|\]/, '').downcase) f089_1.gsub(/.*(bd|Bd).*\,/, '') # Try to remove volume count from elsif f331_2 f331_2 else '' end short_title.gsub!(/<<|>>/, '') short_title.strip! short_title[0] = short_title[0].upcase if short_title.present? short_title.presence end field :title_search do search_titles = [] search_titles << ref(:title_display) (342..355).each do |f| search_titles << doc.field("#{f}", ind2: '1').subfield('a').get.values end search_titles << doc.field('370', ind2: '1').subfield('a').get.values search_titles << doc.field('376', ind2: '1').subfield('a').get.values (451..491).step(10).each do |f| search_titles << doc.field("#{f}", ind2: '1').subfield('a').get.values end search_titles << doc.field('502', ind2: '1').subfield('a').get.values search_titles << doc.field('504', ind2: '1').subfield('a').get.values search_titles << doc.field('505', ind2: '1').subfield('a').get.values (526..534).each do |f| search_titles << doc.field("#{f}", ind2: '1').subfield('a').get.values end search_titles << doc.field('621', ind2: '1').subfield('a').get.values search_titles << doc.field('627', ind2: '1').subfield('a').get.values search_titles << doc.field('633', ind2: '1').subfield('a').get.values search_titles .flatten .compact .map do |search_title| # add entries without dashes in words, if words with dashes are present if search_title.match(/[A-ZÄÖÜ][a-zäöü]+\-[A-ZÄÖÜ][a-zäöü]+/) [search_title, search_title.split(' ').map { |string| string.gsub(/([A-ZÄÖÜ][a-zäöü]+)\-([A-ZÄÖÜ][a-zäöü]+)/, '\1\2').downcase.capitalize }.join(' ')] else search_title end end .flatten .map(&:presence).compact.uniq end # # creator # field :person_creator_display do creators = [] # Personen (100..196).step(4) do |f| doc.field("#{f}", ind1: ['-', 'a'], ind2: ['1', '2']).get.fields.each do |field| creators << (field.get_subfield('a').try(:value) || field.get_subfield('p').try(:value) || field.get_subfield('c').try(:value)) end end creators.map(&:presence).compact.uniq end def self.corporate_body_from_field(field) subfield_a = field.get_subfield('a').try(:value) # Körperschafts-/Kongressname/Geografikum ohne IDN-Verknüpfung (NW) subfield_b = field.get_subfield('b').try(:value) # Unterordnung subfield_c = field.get_subfield('c').try(:value) # Ort (NW) subfield_d = field.get_subfield('d').try(:value) # Datum (NW) subfield_e = field.get_subfield('e').try(:value) # Kongressname (NW) subfield_g = field.get_subfield('g').try(:value) # Name des Geografikums (NW) subfield_h = field.get_subfield('h').try(:value) # Zusatz subfield_k = field.get_subfield('k').try(:value) # Körperschaftsname (NW) subfield_n = field.get_subfield('n').try(:value) # Zählung (W) subfield_x = field.get_subfield('x').try(:value) # nachgeordneter Teil (W) subfield_z = field.get_subfield('x').try(:value) # geografische Unterteilung (W) if !subfield_a && subfield_b && !subfield_c && !subfield_e && !subfield_g && subfield_h && subfield_k && !subfield_x && !subfield_z "#{subfield_k} <#{subfield_h}> / #{subfield_b}" else [ subfield_a, subfield_k, subfield_e, subfield_g, subfield_b ? "/ #{subfield_b}" : nil, "<#{[subfield_h, subfield_n, subfield_d, subfield_c, subfield_x, subfield_z].compact.join(', ').presence}>", ].compact.join(' ').try(:sub, '<>', '').try(:strip) end end field :corporate_body_creator_display do creators = [] # Körpferschaften (200..296).step(4) do |f| doc.field("#{f}", ind1: ['-', 'a'], ind2: ['1', '2']).get.fields.each do |field| creators << engine.corporate_body_from_field(field) end end creators.map(&:presence).compact.uniq end field :author_statement do f359_1 = doc.field('359', ind1:"-", ind2:'1').subfield('a').get.values.flatten.presence f359_2 = doc.field('359', ind1:"-", ind2:'2').subfield('a').get.values.flatten.presence f359_1 || f359_2 end # # contributor # field :person_contributor_display do contributors = [] # Personen (100..196).step(4) do |f| doc.field("#{f}", ind1: ['b', 'c', 'e', 'f'], ind2: ['1', '2']).get.fields.each do |field| name = (field.get_subfield('a').try(:value) || field.get_subfield('p').try(:value) || field.get_subfield('c').try(:value)) action_designator = field.get_subfield('b').try(:value) contributors << [name, action_designator].map(&:presence).compact.join(' ') end end contributors.map(&:presence).compact.uniq end field :corporate_body_contributor_display do contributors = [] # Körpferschaften (200..296).step(4) do |f| doc.field("#{f}", ind1: ['b', 'c', 'e', 'f'], ind2: ['1', '2']).get.fields.each do |field| contributors << engine.corporate_body_from_field(field) end end contributors.map(&:presence).compact.uniq end field :creator_contributor_display do creators = [] # Personen (100..196).step(4) do |f| creators << doc.field("#{f}", ind2: ['1', '2']).subfield(['a','p','c','n','b']).get.value end # Körperschaften (200..296).step(4) do |f| creators << doc.field("#{f}", ind2: ['1', '2']).subfield(['a','k','b','e','c','n','g','h']).get.value end # Sonderfall: Verfasserangaben aus 359 -> [u.a.] t = doc.field("359", ind2: '1').subfield('a').get.value.presence creators << '[u.a.]' if t && t.match(/\.\.\.|\[u\.a\.\]/i) # Cleanup creators = creators.flatten.compact creators = creators.map{ |c| c.delete('<').delete('>') } creators = creators.map(&:presence).compact.uniq end # # Creators and contributors without roles for faceting # field :creator_contributor_facet do if (creator_contributor_display = ref(:creator_contributor_display)).present? creator_contributor_display.map { |creator_contributor| creator_contributor.gsub(/\[.*\]/, '').strip } end end # # Creators and contributors in extended normalized form without roles # field :creator_contributor_search do creators = [] # Alle aus dem Display creators = creators + ref(:creator_contributor_display) # Entferne Sonderfall [u.a.] creators.reject!{|e| e == '[u.a.]'} # + Index Felder für Personen creators << doc.field('PPE').subfield(['a', 'p']).get.values # + zweiteilige Nebeneintragungen / beigefügte oder enthaltene Werke (800..824).step(6) do |f| creators << doc.field("#{f}").subfield(['a','p','c','n','b']).get.values end # + Index Felder für Körperschaften creators << doc.field('PKO').subfield(['a','k','b','e','g']).get.values # Füge alle Teile zusammen creators = creators.flatten.compact # Lösche Sortierzeichen creators = creators.map{ |c| c.delete('<').delete('>') } # Prüfe Inhalte auf Existenz und entferne doppelte Einträge creators = creators.map(&:presence).compact.uniq end # # Edition # field :edition do f403_1 = doc.field('403', ind2: '1').subfield('a').get.value f403_2 = doc.field('403', ind2: '2').subfield('a').get.value f407_1 = doc.field('407', ind2: '1').subfield('a').get.value f610_1 = doc.field('610', ind1: '-', ind2: '1').get.value # Fußnote zur Sekundärausgabe (einleitende Wendung) f403_1.presence || f403_2.presence || f407_1.presence || f610_1.presence end # # Publisher # field :publisher do # Verlagsort f410_1 = doc.field('410', ind2: '1').subfield('a').get.value f410_2 = doc.field('410', ind2: '2').subfield('a').get.value f415_1 = doc.field('415', ind2: '1').subfield('a').get.value f415_2 = doc.field('415', ind2: '2').subfield('a').get.value f410 = f410_1.presence or f410_2.presence f415 = f415_1.presence or f415_2.presence # Verleger f412_1 = doc.field('412', ind2: '1').subfield('a').get.value f412_2 = doc.field('412', ind2: '2').subfield('a').get.value f417_1 = doc.field('417', ind2: '1').subfield('a').get.value f417_2 = doc.field('417', ind2: '2').subfield('a').get.value f412 = f412_1.presence or f412_2.presence f417 = f417_1.presence or f417_2.presence # ... weitere Verleger in 418 ohne Ortsangabe ignorieren wir publisher = [] publisher << [f410, f412].map(&:presence).compact.join(' : ') publisher << [f415, f417].map(&:presence).compact.join(' : ') publisher.map(&:presence).compact.map { |element| element.gsub(/<<|>>/, '') } end # # Creation date # field :creationdate do f051 = doc.controlfield('051') f052 = doc.controlfield('052') f425_a1 = doc.field('425', ind1: 'a', ind2: '1').subfield('a').get.value # Erscheinungsjahr in Sortierform f425_b1 = doc.field('425', ind1: 'b', ind2: '1').subfield('a').get.value # Erscheinungsjahr in Sortierform des frühsten Bandes f425_c1 = doc.field('425', ind1: 'c', ind2: '1').subfield('a').get.value # Erscheinungsjahr in Sortierform des letzten Bandes f425_p1 = doc.field('425', ind1: 'p', ind2: '1').subfield('a').get.value # Erscheinungsjahr in Sortierform eines Tonträgers f595 = doc.field('595').get.value # Erscheinungsjahr der Quelle if f595.present? f595 # Erscheinungsjahr der Quelle hat Vorrang vor dem eigentlichen Erscheinungsjahr (falls vorhanden) elsif %w(n t).include?(f051.at(0)) or %w(r p).include?(f052.at(0)) # es handelt sich um eine Überordnung if f425_b1.present? and f425_c1.present? if f425_b1 == f425_c1 f425_b1 else "#{f425_b1} – #{f425_c1}" end elsif f425_b1.present? and not f425_c1.present? "#{f425_b1} –" elsif f425_c1.present? and not f425_b1.present? "– #{f425_c1}" else f425_a1.presence || f425_p1.presence end else f425_a1.presence || f425_p1.presence end end field :creationdate_search do date = ref(:creationdate) if date.present? date.gsub(/[^0-9]/i, '') # Entferne alle nicht numerischen Zeichen end end # # Format # field :format do f433 = doc.field('433', ind2: '1').subfield('a').get.value f434 = doc.field('434', ind2: '1').subfield('a').get.values.join(', ') f435 = doc.field('435', ind2: '1').subfield('a').get.value f437 = doc.field('437', ind2: '1').subfield('a').get.value f653 = doc.field('653', ind2: '1').subfield('a').get.values.join(', ') format = f433 format = merge(format, f434, delimiter: ' : ') format = merge(format, f435, delimiter: ' ; ') format = merge(format, f437, delimiter: ' + ') format = merge(format, f653, delimiter: '.- ') format.presence end # # Is part of # field :is_part_of do f525 = doc.field('525', ind2: '1').subfield(['p','a']).get.value(join_subfields: ': ') f590 = doc.field('590', ind2: '1').subfield('a').get.value f591 = doc.field('591', ind2: '1').subfield('a').get.value f592 = doc.field('592', ind2: '1').subfield('a').get.value f593 = doc.field('593', ind2: '1').subfield('a').get.value f594 = doc.field('594', ind2: '1').subfield('a').get.value f595 = doc.field('595', ind2: '1').subfield('a').get.value f596 = doc.field('596', ind2: '1').subfield('a').get.value f597 = doc.field('597', ind2: '1').subfield('a').get.value f598 = doc.field('598', ind2: '1').subfield('a').get.value #f599_1 = doc.datafield('599', ind1: 'a,b', ind2: '1', subfield: 'a', multiple: true).join(' ') # ISSN #f599_2 = doc.datafield('599', ind1: 'c,d', ind2: '1', subfield: 'a', multiple: true).join(' ') # ISBN #f599_3 = doc.datafield('599', ind1: 'e,f', ind2: '1', subfield: 'a', multiple: true).join(' ') # ISMN #f599_4 = doc.datafield('599', ind1: 'g,h', ind2: '1', subfield: 'a', multiple: true).join(' ') # ISRN ipo = f525 ipo = merge(ipo, f590, delimiter: '.- ', wrap: "In: @" ) ipo = merge(ipo, f591, delimiter: ' / ' ) ipo = merge(ipo, f592, delimiter: '. ' ) ipo = merge(ipo, f593, delimiter: '.- ' ) ipo = merge(ipo, f594, delimiter: '.- ' ) ipo = merge(ipo, f595, delimiter: ', ' ) ipo = merge(ipo, f596, delimiter: '.- ' ) ipo = merge(ipo, f597, delimiter: '.- ', wrap: "(@)" ) ipo = merge(ipo, f598, delimiter: '.- ' ) #ipo = merge(ipo, f599_1, delimiter: '.- ', wrap: "ISSN @") #ipo = merge(ipo, f599_2, delimiter: '.- ', wrap: "ISSN @") #ipo = merge(ipo, f599_3, delimiter: '.- ', wrap: "ISMN @") #ipo = merge(ipo, f599_4, delimiter: '.- ', wrap: "ISRN @") ipo.presence end # # ISBN # field :isbn do isbns = [] isbns << doc.field('540', ind1: '-z', ind2: '1').subfield('a').get.values isbns << doc.field('634', ind1: '-z', ind2: '1').subfield('a').get.values isbns << doc.field('086', ind2: '1').subfield(['b','c','d']).get.values isbns.flatten.map{|v| v.gsub(/[^0-9\-x]/i, '').strip if v.present?}.map(&:presence).compact.uniq end # # ISSN # field :issn do issns = [] issns << doc.field('542', ind1: '-z', ind2: '1').subfield('a').get.values issns << doc.field('635', ind1: '-z', ind2: '1').subfield('a').get.values issns << doc.field('545', ind2: '1').subfield(['a','b','c','d']).get.values issns.flatten.map{|v| v.gsub(/[^0-9\-x]/i, '').strip if v.present?}.map(&:presence).compact.uniq end # # ZDB ID # field :zdb_id do zdb_ids = [] zdb_ids << doc.field('025', ind1: 'z', ind2: '1').subfield('a').get.values zdb_ids.flatten.map(&:presence).compact.uniq end # # Subject # field :subject do subjects = [] subjects << doc.field('902').subfield(['a','p','k','s','g','e','t']).get.values # Beispiel aus 740: New York (N.Y.)--Social life and customs--20th century--Fiction. %w(710 711 740).each do |f| t = doc.field(f).subfield('a').get.values t = t.flatten.map(&:presence).compact t = t.map{|a| a.split('--')}.flatten.map{|s| s.end_with?('.') ? s[0..-2].strip : s}.map(&:presence).compact.uniq subjects = subjects + t end subjects.flatten.map(&:presence).compact.map{|f| f.delete('<').delete('>')}.uniq end field :subject_search do subjects = [] # Alles Display subjects subjects << ref(:subject) # + Index Felder für weitere Schlagworte subjects << doc.field('PSW').subfield(['a','k','e','g','s','p','t','f','z']).get.values subjects.flatten.map(&:presence).compact.map{|f| f.delete('<').delete('>')}.uniq end # # DDC # field :ddc do ddc_fields = [] ddc_fields << doc.field('700', ind1: 'b', ind2: '1').subfield('a').get.values ddc_fields << doc.field('705', ind1: ' ', ind2: '1').subfield('a').get.values ddc_fields.flatten.map(&:presence).compact.uniq end # # Abstracts # field :abstract do abstracts = [] abstracts << doc.field('750').subfield('a').get.value abstracts << doc.field('753').subfield('a').get.value abstracts << doc.field('756').subfield('a').get.value abstracts.map(&:presence).compact end # # Language # field :language do languages = [] languages << doc.field('037', ind1: 'b', ind2: ['1','2']).subfield('a').get.values languages.flatten.map(&:presence).compact.uniq end # # Relation # field :relation do relations = [] if (f021a = doc.field('021').subfield('a').get.value).present? relations << { ht_number: f021a, label: 'Primärform' } end if (f022a = doc.field('022').subfield('a').get.value).present? relations << { ht_number: f022a, label: 'Sekundärform' } end (526..534).each do |mab_field_number| doc.field("#{mab_field_number}", ind2: '1').get.fields.each do |field| ht_number = field.get_subfield('9').try(:value).presence label = [ field.get_subfield('p').try(:value).presence, field.get_subfield('a').try(:value).try(:gsub, /<<|>>/, '').presence ].compact.join(' ') relations << {ht_number: ht_number, label: label} if label end end relations.flatten.select { |relation| relation[:label].present? }.map(&:to_json) end # # JSON encoded field to store superorder related informations # field :superorder_display do superorders = [] # Link zur Überordung eines mehrbändigen Werkes superorders << { ht_number: doc.field('010', ind2: '1').subfield('a').get.value, label: doc.field('331', ind2: '2').subfield('a').get.value, volume_count: doc.field('089', ind2: '1').subfield('a').get.value # Bandzählung dieses Werkes innerhalb der entsprechenden Überordnung } # 451 ff (451..491).step(10) do |f| superorders << { ht_number: doc.field("#{f+2}", ind2: '1').subfield('a').get.value, label: [doc.field("#{f}", ind2: '1').subfield('a').get.value, doc.field("#{f}", ind2: '2').subfield('a').get.value].compact.reject { |label| label[/\A\.\.\.\s+(;|:)/] }.first, volume_count: doc.field("#{f+4}", ind2: '1').subfield('a').get.value } end superorders .map(&:presence) .delete_if { |element| element[:label].blank? } .each do |element| # remove 'not sort' indicators from label element[:label].try(:gsub!, /<<|>>/, '') # remove leading '... ' from label element[:label].try(:gsub!, /\A\.\.\.\s+/, '') # get label additions (everything behind the first ':') and make it a clean array element[:label_additions] = if element[:label].present? element[:label][/:.*/] .try(:gsub, /(\A:)|(,\Z)/, '') .try(:strip) .try(:gsub, /,/, ';') .try(:split, ';') .try(:map, &:strip) end # remove every label addition that is also in volume count (space removement and downcasing are done for more fuzzy matching e.g. between 'Faz. 4' and 'faz.4' if element[:label_additions].present? && element[:volume_count].present? volume_count_elements = element[:volume_count].gsub(/,|:|;/, ';').split(';').map { |e| e.gsub(/\s+/, '').downcase } # volume_count elements without spaces for more fuzzy comparing element[:label_additions].reject! { |label_addition| volume_count_elements.include? label_addition.gsub(/\s+/, '').downcase } element[:label_additions].reject! { |label_addition| ['bd', 'band'].any? { |forbidden_label_addition| label_addition.downcase.starts_with? forbidden_label_addition } } # additional remove every Bd. or Band element[:label_additions] = element[:label_additions].presence end # remove any label additions from the label element[:label].try(:gsub!, /:.*\Z/, '') element[:label].try(:gsub!, /;.*\Z/, '') element[:label].try(:strip!) element[:volume_count].try(:gsub!, /<.*>/, '') end.uniq.map(&:to_json) end # # Linked superorders (only ht_numbers) + ht_number if primary form (if any) # # @depends_on: superorder_display # field :superorder do superorders = [] f623 = doc.field('623').get.value # Identifikationsnummer des 1. GT der Sekundärform f629 = doc.field('629').get.value # Identifikationsnummer des 2. GT der Sekundärform superorders << if (json_encoded_superorders_display = ref(:superorder_display)).present? superorders_display = json_encoded_superorders_display.map { |json_encoded_superorder_display| JSON.parse(json_encoded_superorder_display) } superorders_display.map { |superorder_display| superorder_display['ht_number'] } end superorders << f623 if f623.present? superorders << f629 if f629.present? superorders.flatten.map(&:presence).compact end # # Sind wir eine Überordnung? # field :is_superorder do f051 = doc.controlfield('051') f052 = doc.controlfield('052') f051.at(0) == 'n' || f051.at(0) == 't' || f052.at(0) == 'p' || f052.at(0) == 'r' || f052.at(0) == 'z' end # # Sind wir ein Band? # field :is_suborder do ref(:superorder_display).present? # we take superorder_display instead of superorder to exclude superorder relations between primary/secondare form from this indicator end # # Erscheinungsform # field :erscheinungsform do f051 = doc.controlfield('051') f052 = doc.controlfield('052') f051s = f051.join.slice(1..3) || "" f052s = f052.join.slice(1..6) || "" type = case when (f051.at(0) == 'a') then 'article' when (f051.at(0) == 'm') then 'monograph' when (f051.at(0) == 'n') then 'monograph' when (f051.at(0) == 's') then 'monograph' when (f052.at(0) == 'a') then 'article' when (f052.at(0) == 'p') then 'journal' when (f052.at(0) == 'r') then 'series' when (f052.at(0) == 'z') then 'newspaper' when (f051s.include?('t')) then 'article' when (f052s.include?('au')) then 'article' when (f052s.include?('se')) then 'series' # ... der Rest else # # Hack to make all suborders without proper 'erscheinungsform' monographs # if ref(:is_suborder).presence 'monograph' else 'other' end end type end # # Description # field :description do descriptions = [] # 405 - Erscheinungsverlauf von Zeitschriften descriptions << doc.field('405', ind2: '1').subfield(['a', 'p']).get.values(join_subfields: ': ') # 522 - Teilungsvermerk bei fortlaufenden Sammelwerken descriptions << doc.field('522', ind2: '1').subfield(['a', 'p']).get.values(join_subfields: ': ') # 523 - Erscheinungsverlauf von Monos descriptions << doc.field('523', ind2: '1').subfield(['a', 'p']).get.values(join_subfields: ': ') (501..519).each do |f| descriptions << doc.field("#{f}", ind2: '1').subfield(['a', 'p']).get.values(join_subfields: ': ') end (536..537).each do |f| descriptions << doc.field("#{f}", ind2: '1').subfield(['a', 'p']).get.values(join_subfields: ': ') unless f == 537 && ref(:erscheinungsform) == "journal" end # Finally... descriptions.flatten.map(&:presence).compact.uniq end # # Delivery Catagory # field :delivery_category do materialtyp = ref(:materialtyp) erscheinungsform = ref(:erscheinungsform) if (materialtyp == 'online_resource') 'electronic_resource' elsif (erscheinungsform == 'series' || erscheinungsform == 'journal') 'structural_metadata' else 'physical_item' end end field :volume_count_sort do possible_values = [] possible_values << doc.field('090', ind2: '1').subfield('a').get.value (451..491).step(10) { |f| possible_values << doc.field("#{f+5}", ind2: '1').subfield('a').get.value } count = possible_values.map(&:presence).compact.uniq.first count.rjust(15, '0') if count.present? end field :volume_count_sort2 do possible_values = [] possible_values << doc.field('090', ind2: '1').subfield('a').get.value (451..491).step(10) { |f| possible_values << doc.field("#{f+5}", ind2: '1').subfield('a').get.value } count = possible_values.map(&:presence).compact.uniq.first if count.present? # extract the count groups count_groups = count.split(',') # make sure we have always 4 groups count_groups = Array.new(4) { |i| count_groups[i] } # make sure every group is a string count_groups.map!{ |g| g.to_s } # clean things up a bit count_groups.map!{ |g| g.gsub(/\[|\]/, '').strip } # fill up each group to 6 characters count_groups.map!{ |g| g.rjust(6, '0') } # make sure each group is really 6 characters. count_groups.map!{ |g| g[0..5] } # finally join count_groups.join else nil end end # # Notation # field :notation do doc.field('700', ind2: ' ').subfield('a').get.values(join_subfields: '') end field :notation_sort do ref(:notation).try(:last) end # # Table of contents # field :toc do doc.field('TXT').subfield('a').get.values.join(' ') end # # Signature # field :signature do signatures = [] # Lade LOC Felder für Signaturen-Extraktion fields = doc.field('LOC').subfield(['b','d','5']).get.fields.presence || [] # Lösche alle Felder die kein Unterfeld d haben (ausgesondert) fields = fields.reject{|f| f.subfields.find{|sf| sf.name == "d"}.blank?} # Prüfe ob alle Exemplare im Magazin stehen all_stack = fields.map{|f| f.subfields.find {|sf| sf.name == 'b' && sf.value.match(/02|03|04|07/)}.present?}.all? # Zeitschriftensignatur (haben Vorrgang, falls vorhanden) # # Achtung, bei Feld 200 handelt es sich um einen Aleph-Expand. Dieses Feld ist an den beiden leeren Indikatoren zu erkennen. # Darüber hinaus kann dieses Feld mehrfach vorkommen. Wir nehmen an, dass Subfeld 0 eine Art Zählung angibt, weshalb dort # ein Wert von '1' zu bevorzugen ist. # signatures << doc.field('200', ind1: ' ', ind2: ' ').get.fields .select { |f| f.get_subfield('f').present? } .select { |f| (value = f.get_subfield('0').try(:value)) == '1' || value.nil? } .map { |f| f.get_subfield('f').value.try(:gsub, ' ', '') } .first.presence # Wenn alle Exemplare im Magzin stehen, dann nimm nur die erste signatur if all_stack fields.each do |field| subfield = field.subfields.find{|f| f.name == "d"} if subfield.present? && subfield.value.present? signatures << subfield.value break end end # ansonsten extrahiere aus den normalen Signaturen eine Basis-Signatur else # Lösche alle Felder die als Standordkennziffer eine Magazinkennung haben fields = fields.reject{|f| f.subfields.find{|sf| sf.name == 'b' && sf.value.match(/02|03|04|07/)}.present?} # Sortiere die restlichen Felder nach Unterfeld 5 (Strichcode) fields = fields.sort do |x, y| x5 = x.subfields.find{|f| f.name == "5"} y5 = y.subfields.find{|f| f.name == "5"} if x5 && y5 x5.value <=> y.value else 0 end end # Extrahiere die Signaturen aus Unterfeld d und erzeuge eine Basis-Signatur fields.each do |field| subfield = field.subfields.find{|f| f.name == "d"} if subfield.present? && subfield.value.present? signature = subfield.value index = signature.index('+') || signature.length base_signature = signature[0..index-1] signatures << base_signature end end end # Stücktitel Signatur signatures << doc.field('100', ind2: ' ').subfield('a').get.value # Some additional love for journal signatures signatures.map! do |signature| # if this is a journal signature if signature.try(:[], /\d+[A-Za-z]\d+$/).present? # unless there is a leading standortkennziffer unless signature.starts_with?('P') standort_kennziffer = if (loc_standort_kennziffer = doc.field('LOC').subfield('b').get.value).present? loc_standort_kennziffer elsif (f105a = doc.field('105').subfield('a').get.value).present? f105a end standort_kennziffer.present? ? "P#{standort_kennziffer}/#{signature}".gsub(/\/\//, '/') : signature else signature end.downcase.capitalize # last but not least make journal signatures like P10/34T24 to P10/34t24 else signature end end # Fertig. Wir nehmen die erste Signatur zur Anzeige signatures.flatten.map(&:presence).compact.uniq.first end field :signature_search do signatures = [] signatures = signatures + doc.field('LOC').subfield('d').get.values # Stücktitel Signatur signatures << doc.field('100', ind2: ' ').subfield('a').get.value # Zeitschriftensignatur signatures << doc.field('200', ind1: ' ', ind2: ' ').subfield('f').get.values signatures = signatures.flatten.map(&:presence).compact .map do |signature| _signature = signature .gsub(/\A\//, '') # remove leading '/' for some journal signatures .gsub(/\s+/, '') # remove spaces for some journal signatures (e.g. 'P 10/34 t 26') .upcase # upcase signatures like 'P10/34t26' to 'P10/34T26' to detect duplicates like 34t26 and 34T26 (search engine should handle downcasing, primo does) # for signatures with volume count e.g. 'LKL2468-14/15', add all variants possible ['LKL2468-14/15', 'LKL2468-14', 'LKL2468'] _signature_array = [_signature, _signature.gsub(/(\d+)\/\d+\Z/, '\1'), _signature.gsub(/\-\d+.*\Z/, '')] # for journals which only have one single signature with leading 'Pxx/' like 'P10/34M3' create 'Pxx/'-less version also _signature_array.push _signature.gsub(/\AP\d+\//, '') end.flatten # if any signature is a journal signature if (journal_signature = signatures.select { |signature| signature.try(:[], /\d+[A-Za-z]\d+$/).present? }.first).present? if signatures.none? { |signature| signature.starts_with? 'P' } # TODO: code duplication with :signature standort_kennziffer = if (loc_standort_kennziffer = doc.field('LOC').subfield('b').get.value).present? loc_standort_kennziffer elsif (f105a = doc.field('105').subfield('a').get.value).present? f105a end if standort_kennziffer.present? signatures << "P#{standort_kennziffer}/#{journal_signature}".gsub(/\/\//, '/') end end end signatures.map! do |signature| is_journal_signature = signature.match(/(P\d\d\/)?\d\d?[a-zA-Z]\d\d?/) spaced_journal_signature = signature.gsub(/\AP(\d\d)/, 'P \1').gsub(/(\d\d?)([a-zA-Z])(\d\d?)/, '\1 \2 \3') if is_journal_signature [signature, spaced_journal_signature] end.flatten! signatures.flatten.map(&:presence).compact.uniq end # # Link to resource # # In u steht die URL, in 3, x, z können Beschreibungen stehen. # 3 - Information über die Art. Hier kann man z.B. Inhaltsverzeichnisse erkennen # x - "Verlag" # z - Zugriffsinformationen (z.B. Nur im Campusnetz) field :resource_link do fulltext_links = [] links = doc.field('655').subfield(['u', '3', 'z', 't']).get.fields links.each do |link| url = link.get_subfield('u').try(:value) subfield_3 = link.get_subfield('3') # HBZ Inhaltsverzeichnisse subfield_z = link.get_subfield('z') # BVB Inhaltsverzeichnisse subfield_t = link.get_subfield('t') # Type: VIEW => Adam Inhaltsverzeichnis unless (url.present? && subfield_3.present? && subfield_3.value =~ /^inhalt/i) || (url.present? && subfield_z.present? && subfield_z.value =~ /^inhalt/i) || (url.present? && subfield_t.present? && subfield_t.value =~ /^view/i) fulltext_links << url end end fulltext_links.compact.presence end # # Link to TOC # field :link_to_toc do toc_links = [] links = doc.field('655').subfield(['u', '3', 'z', 't']).get.fields links.each do |link| url = link.get_subfield('u').try(:value) subfield_3 = link.get_subfield('3') # HBZ Inhaltsverzeichnisse subfield_z = link.get_subfield('z') # BVB Inhaltsverzeichnisse subfield_t = link.get_subfield('t') # Type: VIEW => Adam Inhaltsverzeichnis if (url.present? && subfield_3.present? && subfield_3.value =~ /^inhaltsv/i) || (url.present? && subfield_z.present? && subfield_z.value =~ /^inhaltsv/i) || (url.present? && subfield_t.present? && subfield_t.value =~ /^view/i) toc_links << url end end toc_links.compact.presence end # # Selektionskennzeichen # field :selection_code do codes = [] codes << doc.field('078', ind1: ['e', 'r']).subfield('a').get.values codes.flatten.map(&:presence).compact.uniq end # # Bestandsinformationen Zeitschriften # field :ldsX do # Feld 200 mit ind1 = ' ', ind2 = ' ' # Unterfelder: # 0 - Sortierindikator # a - Einleitende Wendung # b - Verlauf # c - Lücke im Bestand / Verlauf # e - Kommentar # f - Signatur r = [] fields = doc.field('200', ind2: ' ').subfield(['0', 'a', 'b', 'c', 'e', 'f']).get.fields fields.each do |field| field_0 = field.get_subfield('0') field_a = field.get_subfield('a') field_b = field.get_subfield('b') field_c = field.get_subfield('c') field_e = field.get_subfield('e') field_f = field.get_subfield('f') s = "" s = merge(s, field_a.try(:value), delimiter: ' ') s = merge(s, field_b.try(:value), delimiter: ': ') s = merge(s, field_c.try(:value), delimiter: ' ') s = merge(s, field_e.try(:value), delimiter: '. ') s = merge(s, field_f.try(:value), delimiter: ' Zeitschriftensignatur: ') # Cleanup s = s.gsub(/^\- /, '') # Z.b. "- Index: Foo Bar" # Sort position = nil if field_0.present? position = ((field_0.value || "").match(/^(\d+)$/) && $1.to_i) || nil end if position && position.is_a?(Fixnum) r[position] = s else r << s end end r.map(&:presence).compact.uniq end # # Frbr t # field :frbr_t do value = 1 f052 = doc.controlfield('052') ffmt = doc.controlfield('FMT') f100 = doc.field('100').subfield(['a','p']).get.value f200 = doc.field('200').subfield(['a','k','g','e']).get.value f300 = doc.field('300', ind2: '2').subfield('a').get.value if (f052.present? and ['p','z','r','j'].include?(f052.at(0))) value = 99 elsif (ffmt == 'MU' and f300.present?) value = 99 elsif (ffmt == 'MU' and f100.blank? and f200.blank?) value = 99 end value end # # Frbr k1 (ohne Körperschaften zunächst) # field :frbr_k1 do value = nil ffmt = doc.controlfield('FMT') f052 = doc.controlfield('052') f334 = doc.field('334', ind2: '1').subfield('a').get.value.try(:downcase) f100_1 = doc.naco_normalization(doc.field('100', ind1: '-', ind2: '1').subfield(['a','p']).get.value).presence f100_2 = doc.naco_normalization(doc.field('100', ind1: 'b', ind2: '1').subfield(['a','p']).get.value).presence if (f100_1.present?) value = "$$Kpad#{f100_1}$$AA" elsif (f100_2.present?) value = "$$Kpad#{f100_2}$$AA" elsif (f100_2.blank? and ffmt == 'MU') value = "$$Kpad#{f100_2}$$AA" elsif (f100_1.blank? and ffmt == 'MH' and f052.empty? and f334 == "elektronische ressource") value = "$$Kpad#{f100_2}$$AA" end value end field :redactional_remark do doc.field("537", ind2: '1').subfield(['a', 'p']).get.values(join_subfields: ': ').presence.try(:join) end # # Sekundärformen # field :is_secondary_form do (doc.field('610').get.value || doc.field('611').get.value || doc.field('619').get.value || doc.field('621').get.value) != nil end field :secondary_form_preliminary_phrase do doc.field('610', ind1: '-', ind2: '1').get.value end field :secondary_form_publisher do (doc.field('611').get.value.to_s << ' : ' << doc.field('613').get.value.to_s).gsub(/\A : /, '').presence end field :secondary_form_creationdate do doc.field('619').get.value end field :secondary_form_isbn do doc.field('634').get.value end field :secondary_form_physical_description do doc.field('637').get.value end # there can at most be two of 'em field :secondary_form_superorder do [ { ht_number: doc.field('623').get.value, label: doc.field('621').get.value, volume_count: doc.field('625').get.value }, { ht_number: doc.field('629').get.value, label: doc.field('627').get.value, volume_count: doc.field('631').get.value } ].select { |superorder| superorder[:label].present? }.map(&:to_json).presence end field :local_comment do doc.field('125', ind1: ' ', ind2: ' ').subfield(['_', 'a']).get.fields.map(&:values).flatten.uniq.presence end # # additional_data # ( a complex data structure to be stored by the search engine; avoids to touch the normalization rules everytime ) # field :additional_data do additional_data = { author_statement: ref(:author_statement), corporate_body_contributor_display: ref(:corporate_body_contributor_display), corporate_body_creator_display: ref(:corporate_body_creator_display), local_comment: ref(:local_comment), person_contributor_display: ref(:person_contributor_display), person_creator_display: ref(:person_creator_display), redactional_remark: ref(:redactional_remark) } .inject({}) { |hash, (key, value)| hash[key] = value if value.present?; hash } additional_data.to_json if additional_data.present? end # # mab # #field :mab do # (filtered_xml = doc.xml.clone).xpath('/OAI-PMH/ListRecords/record/metadata/record/datafield[@tag="TXT" or @tag="PLK" or @tag="PSW" or @tag="PPE"]').remove # Base64.strict_encode64(ActiveSupport::Gzip.compress(filtered_xml, Zlib::BEST_COMPRESSION)) unless filtered_xml.nil? #end end end