# -*- encoding : utf-8 -*- # -*- coding: utf-8 -*- # Written for use with Blacklight::Solr::Document::Marc, but you can use # it for your own custom Blacklight document Marc extension too -- just # include this module in any document extension (or any other class) # that provides a #to_marc returning a ruby-marc object. This module will add # in export_as translation methods for a variety of formats. module Blacklight::Solr::Document::MarcExport def self.register_export_formats(document) document.will_export_as(:xml) document.will_export_as(:marc, "application/marc") # marcxml content type: # http://tools.ietf.org/html/draft-denenberg-mods-etc-media-types-00 document.will_export_as(:marcxml, "application/marcxml+xml") document.will_export_as(:openurl_ctx_kev, "application/x-openurl-ctx-kev") document.will_export_as(:refworks_marc_txt, "text/plain") document.will_export_as(:endnote, "application/x-endnote-refer") end def export_as_marc to_marc.to_marc end def export_as_marcxml to_marc.to_xml.to_s end alias_method :export_as_xml, :export_as_marcxml # TODO This exporting as formatted citation thing should be re-thought # redesigned at some point to be more general purpose, but this # is in-line with what we had before, but at least now attached # to the document extension where it belongs. def export_as_apa_citation_txt apa_citation( to_marc ) end def export_as_mla_citation_txt mla_citation( to_marc ) end def export_as_chicago_citation_txt chicago_citation( to_marc ) end # Exports as an OpenURL KEV (key-encoded value) query string. # For use to create COinS, among other things. COinS are # for Zotero, among other things. TODO: This is wierd and fragile # code, it should use ruby OpenURL gem instead to work a lot # more sensibly. The "format" argument was in the old marc.marc.to_zotero # call, but didn't neccesarily do what it thought it did anyway. Left in # for now for backwards compatibilty, but should be replaced by # just ruby OpenURL. def export_as_openurl_ctx_kev(format = nil) title = to_marc.find{|field| field.tag == '245'} author = to_marc.find{|field| field.tag == '100'} corp_author = to_marc.find{|field| field.tag == '110'} publisher_info = to_marc.find{|field| field.tag == '260'} edition = to_marc.find{|field| field.tag == '250'} isbn = to_marc.find{|field| field.tag == '020'} issn = to_marc.find{|field| field.tag == '022'} unless format.nil? format.is_a?(Array) ? format = format[0].downcase.strip : format = format.downcase.strip end export_text = "" if format == 'book' export_text << "ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rfr_id=info%3Asid%2Fblacklight.rubyforge.org%3Agenerator&rft.genre=book&" export_text << "rft.btitle=#{(title.nil? or title['a'].nil?) ? "" : CGI::escape(title['a'])}+#{(title.nil? or title['b'].nil?) ? "" : CGI::escape(title['b'])}&" export_text << "rft.title=#{(title.nil? or title['a'].nil?) ? "" : CGI::escape(title['a'])}+#{(title.nil? or title['b'].nil?) ? "" : CGI::escape(title['b'])}&" export_text << "rft.au=#{(author.nil? or author['a'].nil?) ? "" : CGI::escape(author['a'])}&" export_text << "rft.aucorp=#{CGI::escape(corp_author['a']) if corp_author['a']}+#{CGI::escape(corp_author['b']) if corp_author['b']}&" unless corp_author.blank? export_text << "rft.date=#{(publisher_info.nil? or publisher_info['c'].nil?) ? "" : CGI::escape(publisher_info['c'])}&" export_text << "rft.place=#{(publisher_info.nil? or publisher_info['a'].nil?) ? "" : CGI::escape(publisher_info['a'])}&" export_text << "rft.pub=#{(publisher_info.nil? or publisher_info['b'].nil?) ? "" : CGI::escape(publisher_info['b'])}&" export_text << "rft.edition=#{(edition.nil? or edition['a'].nil?) ? "" : CGI::escape(edition['a'])}&" export_text << "rft.isbn=#{(isbn.nil? or isbn['a'].nil?) ? "" : isbn['a']}" elsif (format =~ /journal/i) # checking using include because institutions may use formats like Journal or Journal/Magazine export_text << "ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rfr_id=info%3Asid%2Fblacklight.rubyforge.org%3Agenerator&rft.genre=article&" export_text << "rft.title=#{(title.nil? or title['a'].nil?) ? "" : CGI::escape(title['a'])}+#{(title.nil? or title['b'].nil?) ? "" : CGI::escape(title['b'])}&" export_text << "rft.atitle=#{(title.nil? or title['a'].nil?) ? "" : CGI::escape(title['a'])}+#{(title.nil? or title['b'].nil?) ? "" : CGI::escape(title['b'])}&" export_text << "rft.aucorp=#{CGI::escape(corp_author['a']) if corp_author['a']}+#{CGI::escape(corp_author['b']) if corp_author['b']}&" unless corp_author.blank? export_text << "rft.date=#{(publisher_info.nil? or publisher_info['c'].nil?) ? "" : CGI::escape(publisher_info['c'])}&" export_text << "rft.issn=#{(issn.nil? or issn['a'].nil?) ? "" : issn['a']}" else export_text << "ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Adc&rfr_id=info%3Asid%2Fblacklight.rubyforge.org%3Agenerator&" export_text << "rft.title=" + ((title.nil? or title['a'].nil?) ? "" : CGI::escape(title['a'])) export_text << ((title.nil? or title['b'].nil?) ? "" : CGI.escape(" ") + CGI::escape(title['b'])) export_text << "&rft.creator=" + ((author.nil? or author['a'].nil?) ? "" : CGI::escape(author['a'])) export_text << "&rft.aucorp=#{CGI::escape(corp_author['a']) if corp_author['a']}+#{CGI::escape(corp_author['b']) if corp_author['b']}" unless corp_author.blank? export_text << "&rft.date=" + ((publisher_info.nil? or publisher_info['c'].nil?) ? "" : CGI::escape(publisher_info['c'])) export_text << "&rft.place=" + ((publisher_info.nil? or publisher_info['a'].nil?) ? "" : CGI::escape(publisher_info['a'])) export_text << "&rft.pub=" + ((publisher_info.nil? or publisher_info['b'].nil?) ? "" : CGI::escape(publisher_info['b'])) export_text << "&rft.format=" + (format.nil? ? "" : CGI::escape(format)) end export_text unless export_text.blank? end # This format used to be called 'refworks', which wasn't really # accurate, sounds more like 'refworks tagged format'. Which this # is not, it's instead some weird under-documented Refworks # proprietary marc-ish in text/plain format. See # http://robotlibrarian.billdueber.com/sending-marcish-data-to-refworks/ def export_as_refworks_marc_txt # plugin/gem weirdness means we do need to manually require # here. # As of 11 May 2010, Refworks has a problem with UTF-8 if it's decomposed, # it seems to want C form normalization, although RefWorks support # couldn't tell me that. -jrochkind # DHF: moved this require a little lower in the method. # require 'unicode' fields = to_marc.find_all { |f| ('000'..'999') === f.tag } text = "LEADER #{to_marc.leader}" fields.each do |field| unless ["940","999"].include?(field.tag) if field.is_a?(MARC::ControlField) text << "#{field.tag} #{field.value}\n" else text << "#{field.tag} " text << (field.indicator1 ? field.indicator1 : " ") text << (field.indicator2 ? field.indicator2 : " ") text << " " field.each {|s| s.code == 'a' ? text << "#{s.value}" : text << " |#{s.code}#{s.value}"} text << "\n" end end end if Blacklight.jruby? require 'java' java_import java.text.Normalizer Normalizer.normalize(text, Normalizer::Form::NFC).to_s else require 'unicode' Unicode.normalize_C(text) end end # Endnote Import Format. See the EndNote User Guide at: # http://www.endnote.com/support/enx3man-terms-win.asp # Chapter 7: Importing Reference Data into EndNote / Creating a Tagged “EndNote Import” File # # Note: This code is copied from what used to be in the previous version # in ApplicationHelper#render_to_endnote. It does NOT produce very good # endnote import format; the %0 is likely to be entirely illegal, the # rest of the data is barely correct but messy. TODO, a new version of this, # or better yet just an export_as_ris instead, which will be more general # purpose. def export_as_endnote() end_note_format = { "%A" => "100.a", "%C" => "260.a", "%D" => "260.c", "%E" => "700.a", "%I" => "260.b", "%J" => "440.a", "%@" => "020.a", "%_@" => "022.a", "%T" => "245.a,245.b", "%U" => "856.u", "%7" => "250.a" } marc_obj = to_marc # TODO. This should be rewritten to guess # from actual Marc instead, probably. format_str = 'Generic' text = '' text << "%0 #{ format_str }\n" # If there is some reliable way of getting the language of a record we can add it here #text << "%G #{record['language'].first}\n" end_note_format.each do |key,value| values = value.split(",") first_value = values[0].split('.') if values.length > 1 second_value = values[1].split('.') else second_value = [] end if marc_obj[first_value[0].to_s] marc_obj.find_all{|f| (first_value[0].to_s) === f.tag}.each do |field| if field[first_value[1]].to_s or field[second_value[1]].to_s text << "#{key.gsub('_','')}" if field[first_value[1]].to_s text << " #{field[first_value[1]].to_s}" end if field[second_value[1]].to_s text << " #{field[second_value[1]].to_s}" end text << "\n" end end end end text end ## DEPRECATED stuff left in for backwards compatibility, but should # be gotten rid of eventually. def to_zotero(format) warn("[DEPRECATION] Simply call document.export_as_openurl_kev to get an openURL kev context object suitable for including in a COinS; then have view code make the span for the COinS. ") "" end def to_apa warn("[DEPRECATION] Call document.export_as_apa_citation instead.") export_as_apa_citation end def to_mla warn("[DEPRECATION] Call document.export_as_mla_citation instead.") end protected # Main method for defining chicago style citation. If we don't end up converting to using a citation formatting service # we should make this receive a semantic document and not MARC so we can use this with other formats. def chicago_citation(marc) authors = get_all_authors(marc) author_text = "" unless authors[:primary_authors].blank? if authors[:primary_authors].length > 10 authors[:primary_authors].each_with_index do |author,index| if index < 7 if index == 0 author_text << "#{author}" if author.ends_with?(",") author_text << " " else author_text << ", " end else author_text << "#{name_reverse(author)}, " end end end author_text << " et al." elsif authors[:primary_authors].length > 1 authors[:primary_authors].each_with_index do |author,index| if index == 0 author_text << "#{author}" if author.ends_with?(",") author_text << " " else author_text << ", " end elsif index + 1 == authors[:primary_authors].length author_text << "and #{name_reverse(author)}." else author_text << "#{name_reverse(author)}, " end end else author_text << authors[:primary_authors].first end else temp_authors = [] authors[:translators].each do |translator| temp_authors << [translator, "trans."] end authors[:editors].each do |editor| temp_authors << [editor, "ed."] end authors[:compilers].each do |compiler| temp_authors << [compiler, "comp."] end unless temp_authors.blank? if temp_authors.length > 10 temp_authors.each_with_index do |author,index| if index < 7 author_text << "#{author.first} #{author.last} " end end author_text << " et al." elsif temp_authors.length > 1 temp_authors.each_with_index do |author,index| if index == 0 author_text << "#{author.first} #{author.last}, " elsif index + 1 == temp_authors.length author_text << "and #{name_reverse(author.first)} #{author.last}" else author_text << "#{name_reverse(author.first)} #{author.last}, " end end else author_text << "#{temp_authors.first.first} #{temp_authors.first.last}" end end end title = "" additional_title = "" section_title = "" if marc["245"] and (marc["245"]["a"] or marc["245"]["b"]) title << citation_title(clean_end_punctuation(marc["245"]["a"]).strip) if marc["245"]["a"] title << ": #{citation_title(clean_end_punctuation(marc["245"]["b"]).strip)}" if marc["245"]["b"] end if marc["245"] and (marc["245"]["n"] or marc["245"]["p"]) section_title << citation_title(clean_end_punctuation(marc["245"]["n"])) if marc["245"]["n"] if marc["245"]["p"] section_title << ", #{citation_title(clean_end_punctuation(marc["245"]["p"]))}." elsif marc["245"]["n"] section_title << "." end end if !authors[:primary_authors].blank? and (!authors[:translators].blank? or !authors[:editors].blank? or !authors[:compilers].blank?) additional_title << "Translated by #{authors[:translators].collect{|name| name_reverse(name)}.join(" and ")}. " unless authors[:translators].blank? additional_title << "Edited by #{authors[:editors].collect{|name| name_reverse(name)}.join(" and ")}. " unless authors[:editors].blank? additional_title << "Compiled by #{authors[:compilers].collect{|name| name_reverse(name)}.join(" and ")}. " unless authors[:compilers].blank? end edition = "" edition << setup_edition(marc) unless setup_edition(marc).nil? pub_info = "" if marc["260"] and (marc["260"]["a"] or marc["260"]["b"]) pub_info << clean_end_punctuation(marc["260"]["a"]).strip if marc["260"]["a"] pub_info << ": #{clean_end_punctuation(marc["260"]["b"]).strip}" if marc["260"]["b"] pub_info << ", #{setup_pub_date(marc)}" if marc["260"]["c"] elsif marc["502"] and marc["502"]["a"] # MARC 502 is the Dissertation Note. This holds the correct pub info for these types of records. pub_info << marc["502"]["a"] elsif marc["502"] and (marc["502"]["b"] or marc["502"]["c"] or marc["502"]["d"]) #sometimes the dissertation note is encoded in pieces in the $b $c and $d sub fields instead of lumped into the $a pub_info << "#{marc["502"]["b"]}, #{marc["502"]["c"]}, #{clean_end_punctuation(marc["502"]["d"])}" end citation = "" citation << "#{author_text} " unless author_text.blank? citation << "#{title}. " unless title.blank? citation << "#{section_title} " unless section_title.blank? citation << "#{additional_title} " unless additional_title.blank? citation << "#{edition} " unless edition.blank? citation << "#{pub_info}." unless pub_info.blank? citation end def mla_citation(record) text = '' authors_final = [] #setup formatted author list authors = get_author_list(record) if authors.length < 4 authors.each do |l| if l == authors.first #first authors_final.push(l) elsif l == authors.last #last authors_final.push(", and " + name_reverse(l) + ".") else #all others authors_final.push(", " + name_reverse(l)) end end text += authors_final.join unless text.blank? if text[-1,1] != "." text += ". " else text += " " end end else text += authors.first + ", et al. " end # setup title title = setup_title_info(record) if !title.nil? text += "" + mla_citation_title(title) + " " end # Edition edition_data = setup_edition(record) text += edition_data + " " unless edition_data.nil? # Publication text += setup_pub_info(record) + ", " unless setup_pub_info(record).nil? # Get Pub Date text += setup_pub_date(record) unless setup_pub_date(record).nil? if text[-1,1] != "." text += "." unless text.nil? or text.blank? end text end def apa_citation(record) text = '' authors_list = [] authors_list_final = [] #setup formatted author list authors = get_author_list(record) authors.each do |l| authors_list.push(abbreviate_name(l)) unless l.blank? end authors_list.each do |l| if l == authors_list.first #first authors_list_final.push(l.strip) elsif l == authors_list.last #last authors_list_final.push(", & " + l.strip) else #all others authors_list_final.push(", " + l.strip) end end text += authors_list_final.join unless text.blank? if text[-1,1] != "." text += ". " else text += " " end end # Get Pub Date text += "(" + setup_pub_date(record) + "). " unless setup_pub_date(record).nil? # setup title info title = setup_title_info(record) text += "" + title + " " unless title.nil? # Edition edition_data = setup_edition(record) text += edition_data + " " unless edition_data.nil? # Publisher info text += setup_pub_info(record) unless setup_pub_info(record).nil? unless text.blank? if text[-1,1] != "." text += "." end end text end def setup_pub_date(record) if !record.find{|f| f.tag == '260'}.nil? pub_date = record.find{|f| f.tag == '260'} if pub_date.find{|s| s.code == 'c'} date_value = pub_date.find{|s| s.code == 'c'}.value.gsub(/[^0-9|n\.d\.]/, "")[0,4] unless pub_date.find{|s| s.code == 'c'}.value.gsub(/[^0-9|n\.d\.]/, "")[0,4].blank? end return nil if date_value.nil? end clean_end_punctuation(date_value) if date_value end def setup_pub_info(record) text = '' pub_info_field = record.find{|f| f.tag == '260'} if !pub_info_field.nil? a_pub_info = pub_info_field.find{|s| s.code == 'a'} b_pub_info = pub_info_field.find{|s| s.code == 'b'} a_pub_info = clean_end_punctuation(a_pub_info.value.strip) unless a_pub_info.nil? b_pub_info = b_pub_info.value.strip unless b_pub_info.nil? text += a_pub_info.strip unless a_pub_info.nil? if !a_pub_info.nil? and !b_pub_info.nil? text += ": " end text += b_pub_info.strip unless b_pub_info.nil? end return nil if text.strip.blank? clean_end_punctuation(text.strip) end def mla_citation_title(text) no_upcase = ["a","an","and","but","by","for","it","of","the","to","with"] new_text = [] word_parts = text.split(" ") word_parts.each do |w| if !no_upcase.include? w new_text.push(w.capitalize) else new_text.push(w) end end new_text.join(" ") end # This will replace the mla_citation_title method with a better understanding of how MLA and Chicago citation titles are formatted. # This method will take in a string and capitalize all of the non-prepositions. def citation_title(title_text) prepositions = ["a","about","across","an","and","before","but","by","for","it","of","the","to","with","without"] new_text = [] title_text.split(" ").each_with_index do |word,index| if (index == 0 and word != word.upcase) or (word.length > 1 and word != word.upcase and !prepositions.include?(word)) # the split("-") will handle the capitalization of hyphenated words new_text << word.split("-").map!{|w| w.capitalize }.join("-") else new_text << word end end new_text.join(" ") end def setup_title_info(record) text = '' title_info_field = record.find{|f| f.tag == '245'} if !title_info_field.nil? a_title_info = title_info_field.find{|s| s.code == 'a'} b_title_info = title_info_field.find{|s| s.code == 'b'} a_title_info = clean_end_punctuation(a_title_info.value.strip) unless a_title_info.nil? b_title_info = clean_end_punctuation(b_title_info.value.strip) unless b_title_info.nil? text += a_title_info unless a_title_info.nil? if !a_title_info.nil? and !b_title_info.nil? text += ": " end text += b_title_info unless b_title_info.nil? end return nil if text.strip.blank? clean_end_punctuation(text.strip) + "." end def clean_end_punctuation(text) if [".",",",":",";","/"].include? text[-1,1] return text[0,text.length-1] end text end def setup_edition(record) edition_field = record.find{|f| f.tag == '250'} edition_code = edition_field.find{|s| s.code == 'a'} unless edition_field.nil? edition_data = edition_code.value unless edition_code.nil? if edition_data.nil? or edition_data == '1st ed.' return nil else return edition_data end end def get_author_list(record) author_list = [] authors_primary = record.find{|f| f.tag == '100'} author_primary = authors_primary.find{|s| s.code == 'a'}.value unless authors_primary.nil? rescue '' author_list.push(clean_end_punctuation(author_primary)) unless author_primary.nil? authors_secondary = record.find_all{|f| ('700') === f.tag} if !authors_secondary.nil? authors_secondary.each do |l| author_list.push(clean_end_punctuation(l.find{|s| s.code == 'a'}.value)) unless l.find{|s| s.code == 'a'}.value.nil? end end author_list.uniq! author_list end # This is a replacement method for the get_author_list method. This new method will break authors out into primary authors, translators, editors, and compilers def get_all_authors(record) translator_code = "trl"; editor_code = "edt"; compiler_code = "com" primary_authors = []; translators = []; editors = []; compilers = [] record.find_all{|f| f.tag === "100" }.each do |field| primary_authors << field["a"] if field["a"] end record.find_all{|f| f.tag === "700" }.each do |field| if field["a"] relators = [] relators << clean_end_punctuation(field["e"]) if field["e"] relators << clean_end_punctuation(field["4"]) if field["4"] if relators.include?(translator_code) translators << field["a"] elsif relators.include?(editor_code) editors << field["a"] elsif relators.include?(compiler_code) compilers << field["a"] else primary_authors << field["a"] end end end {:primary_authors => primary_authors, :translators => translators, :editors => editors, :compilers => compilers} end def abbreviate_name(name) name_parts = name.split(", ") first_name_parts = name_parts.last.split(" ") temp_name = name_parts.first + ", " + first_name_parts.first[0,1] + "." first_name_parts.shift temp_name += " " + first_name_parts.join(" ") unless first_name_parts.empty? temp_name end def name_reverse(name) name = clean_end_punctuation(name) return name unless name =~ /,/ temp_name = name.split(", ") return temp_name.last + " " + temp_name.first end end