app/mixin_logic/marc_helper.rb in umlaut-3.1.1 vs app/mixin_logic/marc_helper.rb in umlaut-3.2.0

- old
+ new

@@ -1,41 +1,41 @@ module MarcHelper # Takes an array of ruby MARC objects, adds ServiceResponses - # for the 856 links contained. + # for the 856 links contained. # Returns a hash of arrays of ServiceResponse objects added, keyed - # by service type value string. + # by service type value string. def add_856_links(request, marc_records, options = {}) options[:default_service_type] ||= "fulltext" options[:match_reliability] ||= ServiceResponse::MatchExact responses_added = Hash.new - + # Keep track of urls to avoid putting the exact same url in twice urls_seen = Array.new - + marc_records.each do |marc_xml| - + marc_xml.find_all {|f| '856' === f.tag}.each do |field| # Might have more than one $u, in which case we want to # possibly add each of them. Might have 0 $u in which case - # we skip. - field.subfields.find_all {|sf| sf.code == 'u'}.each do |sf| - url = sf.value - + # we skip. + field.subfields.find_all {|sf| sf.code == 'u'}.each do |sf| + url = sf.value + # Already got it from another catalog record? next if urls_seen.include?(url) - - # Trying to avoid duplicates with SFX/link resolver. + + # Trying to avoid duplicates with SFX/link resolver. skip = should_skip_856_link?(request, marc_xml, url) next if skip - + urls_seen.push(url) - - + + display_name = nil if field['y'] display_name = field['y'] else # okay let's try taking just the domain from the url @@ -48,50 +48,50 @@ display_name = url if display_name.nil? end # But if we've got a $3, the closest MARC comes to a field # that explains what this actually IS, use that too please. display_name = field['3'] + ' from ' + display_name if field['3'] - - # Build the response. - + + # Build the response. + response_params = {:service=>self, :display_text=>display_name, :url=>url} - # get all those $z subfields and put em in notes. + # get all those $z subfields and put em in notes. response_params[:url] = url - + # subfield 3 is being used for OCA records loaded in our catalog. response_params[:notes] = field.subfields.collect {|f| f.value if (f.code == 'z') }.compact.join('; ') - + is_journal = (marc_xml.leader[7,1] == 's') - unless ( field['3'] || ! is_journal ) # subfield 3 is in fact some kind of coverage note, usually - response_params[:notes] += "; " unless response_params[:notes].blank? + unless ( field['3'] || ! is_journal ) # subfield 3 is in fact some kind of coverage note, usually + response_params[:notes] += "; " unless response_params[:notes].blank? response_params[:notes] += "Dates of coverage unknown." end - - + + unless ( options[:match_reliability] == ServiceResponse::MatchExact ) response_params[:match_reliability] = options[:match_reliability] - + response_params[:edition_str] = edition_statement(marc_xml) end - + # Figure out the right service type value for this, fulltext, ToC, # whatever. - response_params[:service_type_value] = service_type_for_856( field, options ) - + response_params[:service_type_value] = service_type_for_856( field, options ) + # fulltext urls from MARC are always marked as specially stupid. response_params[:coverage_checked] = false response_params[:can_link_to_article] = false - + # Some debugging info, add the 001 bibID if we have one. - + response_params[:debug_info] = "BibID: #{marc_xml['001'].value}" if marc_xml['001'] - - + + # Add the response response = request.add_service_response(response_params) - + responses_added[response_params[:service_type_value]] ||= Array.new responses_added[response_params[:service_type_value]].push(response) end end end @@ -116,66 +116,70 @@ # URL. But if it's not a journal, use it anyway, because it's probably # an e-book that is not in SFX, even if it's from a vendor who is in # SFX. We use MARC leader byte 7 to tell if it's a journal. Confusing enough? # Not yet! Even if it is a journal, if this isn't an article-level # cite and there are no other full text already provided, we - # still include. + # still include. def should_skip_856_link?(request, marc_record, url) is_journal = (marc_record.leader[7,1] == 's') - + sfx_controlled = SfxUrl.sfx_controls_url?(url) - + # Do NOT skip if it's a title-level citation with no - # existing full text entries. - not_title_level_empty = !( request.title_level_citation? && - request.get_service_type("fulltext").length == 0 - ) + # existing full text entries. + not_title_level_empty = !( request.title_level_citation? && + request.get_service_type("fulltext").length == 0 + ) result = ( is_journal && sfx_controlled && not_title_level_empty ) return result end # Take a ruby Marc Field object representing an 856 field, # decide what umlaut service type value to map it to. Fulltext, ToC, etc. # This is neccesarily a heuristic guess, Marc doesn't have enough granularity - # to really let us know for sure. + # to really let us know for sure -- + # although if indicator2 is '2' for 'related resource', we decide it is + # NOT fulltext. def service_type_for_856(field, options) options[:default_service_type] ||= "fulltext_title_level" # LC records here at hopkins have "Table of contents only" in the 856$3 - # Think that's a convention from LC? - if (field['3'] && field['3'].downcase =~ /table of contents( only)?/) - return "table_of_contents" - elsif (field['3'] && field['3'].downcase =~ /description/) - # If it contains the word 'description', it's probably an abstract. - # That's the best we can do, sadly. - return "abstract" - elsif (field['3'] && field['3'].downcase == 'sample text') - # LC records often include these links. - return "excerpts" - elsif ( field['u'] =~ /www\.loc\.gov/ ) - # Any other loc.gov link, we know it's not full text, don't put - # it in full text field, put it as "see also". - return "highlighted_link" - else - return options[:default_service_type] - end + # Think that's a convention from LC? + if (field['3'] && field['3'].downcase =~ /table of contents( only)?/) + return "table_of_contents" + elsif (field['3'] && field['3'].downcase =~ /description/) + # If it contains the word 'description', it's probably an abstract. + # That's the best we can do, sadly. + return "abstract" + elsif (field['3'] && field['3'].downcase == 'sample text') + # LC records often include these links. + return "excerpts" + elsif ( field['u'] =~ /www\.loc\.gov/ ) + # Any other loc.gov link, we know it's not full text, don't put + # it in full text field, put it as "see also". + return "highlighted_link" + elsif field.indicator2 == '2' # 'related resource' + return "highlighted_link" + else + return options[:default_service_type] + end end # A MARC record has two dates in it, date1 and date2. Exactly # what they represent is something of an esoteric mystery. - # But this will return them both, in an array. + # But this will return them both, in an array. def get_years(marc) array = [] - - # no marc 008? Weird, but okay. - return array unless marc['008'] - + + # no marc 008? Weird, but okay. + return array unless marc['008'] + date1 = marc['008'].value[7,4] date1.strip! if date1 array.push(date1) unless date1.blank? - + date2 = marc['008'].value[11,4] date2.strip! if date2 array.push(date2) unless date2.blank? return array @@ -184,76 +188,76 @@ # Take the title out of a marc record def get_title(marc) marc['245'].find_all {|sf| sf.code == "a" || sf.code == "b" || sf.code == "k"}.collect {|sf| sf.text}.join(" ").sub(/\s*[;:\/.,]\s*$/) end - + # From a marc record, get a string useful to display for identifying - # which edition/version of a work this represents. + # which edition/version of a work this represents. def edition_statement(marc, options = {}) options[:include_repro_info] ||= true options[:exclude_533_fields] = ['7','f','b', 'e'] parts = Array.new return "" unless marc - + #245$h GMD unless ( marc['245'].blank? || marc['245']['h'].blank? ) parts.push('(' + marc['245']['h'].gsub(/[^\w\s]/, '').strip.titlecase + ')') end #250 if ( marc['250']) parts.push( marc['250']['a'] ) unless marc['250']['a'].blank? parts.push( marc['250']['b'] ) unless marc['250']['b'].blank? end - + # 260 if ( marc['260']) if (marc['260']['b'] =~ /s\.n\./) parts.push(marc['260']['a']) unless marc['260']['a'].blank? else parts.push(marc['260']['b']) unless marc['260']['b'].blank? end parts.push( marc['260']['c'] ) unless marc['260']['c'].blank? end - + # 533 if options[:include_repro_info] && marc['533'] marc['533'].subfields.each do |s| if ( s.code == 'a' ) - parts.push(s.value.gsub(/[^\w\s]/, '') + ':' ) + parts.push(s.value.gsub(/[^\w\s]/, '') + ':' ) elsif (! options[:exclude_533_fields].include?( s.code )) parts.push(s.value) - end + end end end - + return nil if parts.length == 0 return parts.join(' ') end # AACR2 "General Material Designation" . While these are (I think?) # controlled, it's actually really hard to find the list. Maybe they're - # only semi-controlled. + # only semi-controlled. # ONE list can be found here: http://www.oclc.org/bibformats/en/onlinecataloging/default.shtm#BCGFECEG def gmd_values - # 'computer file' is an old one that may still be found in data. - return ['activity card', + # 'computer file' is an old one that may still be found in data. + return ['activity card', 'art original','art reproduction','braille','chart','diorama','electronic resource','computer file', 'filmstrip','flash card','game','globe','kit','manuscript','map','microform','microscope slides','model','motion picture','music','picture','realia','slide','sound recording','technical drawing','text','toy','transparency','videorecording'] end # removes something that looks like an AACR2 GMD in square brackets from - # the string. Pretty kludgey. + # the string. Pretty kludgey. def strip_gmd(arg_string, options = {}) options[:replacement] ||= ':' - + gmd_values.each do |gmd_val| arg_string = arg_string.sub(/\[#{gmd_val}( \((tactile|braile|large print)\))?\]/, options[:replacement]) end return arg_string end - + end