lib/stanford-mods/searchworks.rb in stanford-mods-0.0.13 vs lib/stanford-mods/searchworks.rb in stanford-mods-0.0.14

- old
+ new

@@ -1,8 +1,8 @@ # encoding: UTF-8 require 'stanford-mods/searchworks_languages' - +require 'logger' # SearchWorks specific wranglings of MODS metadata as a mixin to the Stanford::Mods::Record object module Stanford module Mods class Record < ::Mods::Record @@ -180,8 +180,439 @@ result << parts.join(sep).strip unless parts.empty? } result end + # Values are the contents of: + # mods/genre + # mods/subject/topic + # @return [Array<String>] values for the topic_search Solr field for this document or nil if none + def topic_search + @topic_search ||= begin + vals = self.term_values(:genre) || [] + vals.concat(subject_topics) if subject_topics + vals.empty? ? nil : vals + end + end + def place + vals = self.term_values([:origin_info,:place,:placeTerm]) + vals + end + def main_author_w_date_test + result = nil + first_wo_role = nil + self.plain_name.each { |n| + if n.role.size == 0 + first_wo_role ||= n + end + n.role.each { |r| + if r.authority.include?('marcrelator') && + (r.value.include?('Creator') || r.value.include?('Author')) + result ||= n.display_value_w_date + end + } + } + if !result && first_wo_role + result = first_wo_role.display_value_w_date + end + result + end + #remove trailing commas + def sw_full_title_without_commas + toret = self.sw_full_title + if toret + toret = toret.gsub(/,$/, '') + end + toret + end + + def sw_logger + @logger ||= Logger.new(STDOUT) + end + # Values are the contents of: + # subject/geographic + # subject/hierarchicalGeographic + # subject/geographicCode (only include the translated value if it isn't already present from other mods geo fields) + # @return [Array<String>] values for the geographic_search Solr field for this document or nil if none + def geographic_search + @geographic_search ||= begin + result = self.sw_geographic_search + + # TODO: this should go into stanford-mods ... but then we have to set that gem up with a Logger + # print a message for any unrecognized encodings + xvals = self.subject.geographicCode.translated_value + codes = self.term_values([:subject, :geographicCode]) + if codes && codes.size > xvals.size + self.subject.geographicCode.each { |n| + if n.authority != 'marcgac' && n.authority != 'marccountry' + sw_logger.info("#{druid} has subject geographicCode element with untranslated encoding (#{n.authority}): #{n.to_xml}") + end + } + end + + # FIXME: stanford-mods should be returning [], not nil ... + return nil if !result || result.empty? + result + end + end + + # Values are the contents of: + # subject/name + # subject/occupation - no subelements + # subject/titleInfo + # @return [Array<String>] values for the subject_other_search Solr field for this document or nil if none + def subject_other_search + @subject_other_search ||= begin + vals = subject_occupations ? Array.new(subject_occupations) : [] + vals.concat(subject_names) if subject_names + vals.concat(subject_titles) if subject_titles + vals.empty? ? nil : vals + end + end + + # Values are the contents of: + # subject/temporal + # subject/genre + # @return [Array<String>] values for the subject_other_subvy_search Solr field for this document or nil if none + def subject_other_subvy_search + @subject_other_subvy_search ||= begin + vals = subject_temporal ? Array.new(subject_temporal) : [] + gvals = self.term_values([:subject, :genre]) + vals.concat(gvals) if gvals + + # print a message for any temporal encodings + self.subject.temporal.each { |n| + sw_logger.info("#{druid} has subject temporal element with untranslated encoding: #{n.to_xml}") if !n.encoding.empty? + } + + vals.empty? ? nil : vals + end + end + # @return [Array<String>] values for the pub_date_group_facet + def pub_date_groups year + if not year + return nil + end + year=year.to_i + current_year=Time.new.year.to_i + result = [] + if year >= current_year - 1 + result << "This year" + else + if year >= current_year - 3 + result << "Last 3 years" + else + if year >= current_year - 10 + result << "Last 10 years" + else + if year >= current_year - 50 + result << "Last 50 years" + else + result << "More than 50 years ago" + end + end + end + end + end + + # select one or more format values from the controlled vocabulary here: + # http://searchworks-solr-lb.stanford.edu:8983/solr/select?facet.field=format&rows=0&facet.sort=index + # based on the dor_content_type + # @return [String] value in the SearchWorks controlled vocabulary + def format + val=[] + formats=self.term_values(:typeOfResource) + if formats + formats.each do |form| + case form + when 'still image' + val << 'Image' + when 'mixed material' + val << 'Manuscript/Archive' + when 'moving image' + val << 'Video' + when 'three dimensional object' + val <<'Other' + when 'cartographic' + val << 'Map/Globe' + when 'sound recording-musical' + val << 'Music-Recording' + when 'sound recording-nonmusical' + val << 'Sound Recording' + when 'software, multimedia' + val << 'Computer File' + else + sw_logger.warn "#{druid} has an unknown typeOfResource #{form}" + end + end + end + if val.length>0 + return val.uniq + end + if not self.typeOfResource or self.typeOfResource.length == 0 + sw_logger.warn "#{druid} has no valid typeOfResource" + [] + end + end + + # Values are the contents of: + # all subject subelements except subject/cartographic plus genre top level element + # @return [Array<String>] values for the subject_all_search Solr field for this document or nil if none + def subject_all_search + vals = topic_search ? Array.new(topic_search) : [] + vals.concat(geographic_search) if geographic_search + vals.concat(subject_other_search) if subject_other_search + vals.concat(subject_other_subvy_search) if subject_other_subvy_search + vals.empty? ? nil : vals + end + def pub_date_display + if pub_dates + pub_dates.first + else + nil + end + end + #get the dates from dateIssued, and dateCreated merged into 1 array. + # @return [Array<String>] values for the issue_date_display Solr field for this document or nil if none + def pub_dates + vals = self.term_values([:origin_info,:dateIssued]) + if vals + vals = vals.concat self.term_values([:origin_info,:dateCreated]) unless not self.term_values([:origin_info,:dateCreated]) + else + vals = self.term_values([:origin_info,:dateCreated]) + end + vals and vals.empty? ? nil : vals + end + def is_number?(object) + true if Integer(object) rescue false + end + def is_date?(object) + true if Date.parse(object) rescue false + end + + # Get the publish year from mods + #@return [String] 4 character year or nil if no valid date was found + def pub_year + #use the cached year if there is one + if @pub_year + if @pub_year == '' + return nil + end + return @pub_year + end + dates=pub_dates + if dates + year=[] + pruned_dates=[] + dates.each do |f_date| + #remove ? and [] + pruned_dates << f_date.gsub('?','').gsub('[','').gsub(']','') + end + #try to find a date starting with the most normal date formats and progressing to more wonky ones + @pub_year=get_plain_four_digit_year pruned_dates + return @pub_year if @pub_year + @pub_year=get_double_digit_century pruned_dates + return @pub_year if @pub_year + @pub_year=get_three_digit_year pruned_dates + return @pub_year if @pub_year + @pub_year=get_single_digit_century pruned_dates + return @pub_year if @pub_year + end + @pub_year='' + sw_logger.info("#{druid} no valid pub date found in '#{dates.to_s}'") + return nil + end + #creates a date suitable for sorting. Guarnteed to be 4 digits or nil + def pub_date_sort + pd=nil + if pub_date + pd=pub_date + if pd.length == 3 + pd='0'+pd + end + pd=pd.gsub('--','00') + end + raise "pub_date_sort was about to return a non 4 digit value #{pd}!" if pd and pd.length !=4 + pd + end + #The year the object was published, , filtered based on max_pub_date and min_pub_date from the config file + #@return [String] 4 character year or nil + def pub_date + val=pub_year + if val + return val + end + nil + end + #Values for the pub date facet. This is less strict than the 4 year date requirements for pub_date + #@return <Array[String]> with values for the pub date facet + def pub_date_facet + if pub_date + if pub_date.include? '--' + cent=pub_date[0,2].to_i + cent+=1 + cent=cent.to_s+'th century' + cent + else + pub_date + end + else + nil + end + end + + # Values are the contents of: + # subject/topic + # subject/name + # subject/title + # subject/occupation + # with trailing comma, semicolon, and backslash (and any preceding spaces) removed + # @return [Array<String>] values for the topic_facet Solr field for this document or nil if none + def topic_facet + vals = subject_topics ? Array.new(subject_topics) : [] + vals.concat(subject_names) if subject_names + vals.concat(subject_titles) if subject_titles + vals.concat(subject_occupations) if subject_occupations + vals.map! { |val| + v = val.sub(/[\\,;]$/, '') + v.strip + } + vals.empty? ? nil : vals + end + + # geographic_search values with trailing comma, semicolon, and backslash (and any preceding spaces) removed + # @return [Array<String>] values for the geographic_facet Solr field for this document or nil if none + def geographic_facet + geographic_search.map { |val| val.sub(/[\\,;]$/, '').strip } unless !geographic_search + end + + # subject/temporal values with trailing comma, semicolon, and backslash (and any preceding spaces) removed + # @return [Array<String>] values for the era_facet Solr field for this document or nil if none + def era_facet + subject_temporal.map { |val| val.sub(/[\\,;]$/, '').strip } unless !subject_temporal + end + # @return [String] value with the numeric catkey in it, or nil if none exists + def catkey + catkey=self.term_values([:record_info,:recordIdentifier]) + if catkey and catkey.length>0 + return catkey.first.gsub('a','') #need to ensure catkey is numeric only + end + nil + end + def druid= new_druid + @druid=new_druid + end + def druid + @druid ? @druid : 'Unknown item' + end + + # protected ---------------------------------------------------------- + + # convenience method for subject/name/namePart values (to avoid parsing the mods for the same thing multiple times) + def subject_names + @subject_names ||= self.sw_subject_names + end + + # convenience method for subject/occupation values (to avoid parsing the mods for the same thing multiple times) + def subject_occupations + @subject_occupations ||= self.term_values([:subject, :occupation]) + end + + # convenience method for subject/temporal values (to avoid parsing the mods for the same thing multiple times) + def subject_temporal + @subject_temporal ||= self.term_values([:subject, :temporal]) + end + + # convenience method for subject/titleInfo values (to avoid parsing the mods for the same thing multiple times) + def subject_titles + @subject_titles ||= self.sw_subject_titles + end + + # convenience method for subject/topic values (to avoid parsing the mods for the same thing multiple times) + def subject_topics + @subject_topics ||= self.term_values([:subject, :topic]) + end + + #get a 4 digit year like 1865 from the date array + def get_plain_four_digit_year dates + dates.each do |f_date| + matches=f_date.scan(/\d{4}/) + if matches.length == 1 + @pub_year=matches.first + return matches.first + else + #if there are multiples, check for ones with CE after them + matches.each do |match| + #look for things like '1865-6 CE' + pos = f_date.index(Regexp.new(match+'...CE')) + pos = pos ? pos.to_i : 0 + if f_date.include?(match+' CE') or pos > 0 + @pub_year=match + return match + end + end + end + end + return nil + end + + #get a double digit century like '12th century' from the date array + def get_double_digit_century dates + dates.each do |f_date| + matches=f_date.scan(/\d{2}th/) + if matches.length == 1 + @pub_year=((matches.first[0,2].to_i)-1).to_s+'--' + return @pub_year + end + #if there are multiples, check for ones with CE after them + if matches.length > 0 + matches.each do |match| + pos = f_date.index(Regexp.new(match+'...CE')) + pos = pos ? pos.to_i : f_date.index(Regexp.new(match+' century CE')) + pos = pos ? pos.to_i : 0 + if f_date.include?(match+' CE') or pos > 0 + @pub_year=((match[0,2].to_i) - 1).to_s+'--' + return @pub_year + end + end + end + end + return nil + end + + #get a 3 digit year like 965 from the date array + def get_three_digit_year dates + dates.each do |f_date| + matches=f_date.scan(/\d{3}/) + if matches.length > 0 + return matches.first + end + end + return nil + end + + #get a single digit century like '9th century' from the date array + def get_single_digit_century dates + dates.each do |f_date| + matches=f_date.scan(/\d{1}th/) + if matches.length == 1 + @pub_year=((matches.first[0,2].to_i)-1).to_s+'--' + return @pub_year + end + #if there are multiples, check for ones with CE after them + if matches.length > 0 + matches.each do |match| + pos = f_date.index(Regexp.new(match+'...CE')) + pos = pos ? pos.to_i : f_date.index(Regexp.new(match+' century CE')) + pos = pos ? pos.to_i : 0 + if f_date.include?(match+' CE') or pos > 0 + @pub_year=((match[0,1].to_i) - 1).to_s+'--' + return @pub_year + end + end + end + end + return nil + end end # class Record end # Module Mods end # Module Stanford