lib/stanford-mods/imprint.rb in stanford-mods-2.6.4 vs lib/stanford-mods/imprint.rb in stanford-mods-3.0.0.alpha1
- old
+ new
@@ -1,6 +1,6 @@
-require 'active_support/core_ext/integer/inflections'
+require 'mods/marc_country_codes'
module Stanford
module Mods
##
# Get the imprint information from originInfo elements (and sub elements) to create display strings
@@ -8,81 +8,37 @@
# This code is adapted from the mods_display gem. In a perfect world, this
# code would make use of the date_parsing class instead of reimplementing pieces of it;
# however, the date_parsing class only does years, and this does finer tuned dates and also
# reformats them according to the encoding.
class Imprint
- # @param [Nokogiri::XML::NodeSet] originInfo_ng_nodeset of originInfo nodes
- def initialize(originInfo_ng_nodeset)
- @originInfo_ng_nodeset = originInfo_ng_nodeset
+ attr_reader :element
+
+ # @param [Nokogiri::XML::Node] an originInfo node
+ def initialize(element)
+ @element = element
end
- require 'marc_countries'
-
- # @return Array<String> each String is an imprint statement from a single originInfo element
def imprint_statements
- results = []
- @originInfo_ng_nodeset.each do |origin_info_node|
- edition = edition_vals_str(origin_info_node)
- place = place_vals_str(origin_info_node)
- publisher = publisher_vals_str(origin_info_node)
- dates = date_str(origin_info_node)
-
- place_pub = compact_and_join_with_delimiter([place, publisher], ' : ')
- edition_place_pub = compact_and_join_with_delimiter([edition, place_pub], ' - ')
- ed_place_pub_dates = compact_and_join_with_delimiter([edition_place_pub, dates], ', ')
-
- results << ed_place_pub_dates unless ed_place_pub_dates.empty?
- end
- results
+ display_str
end
+ # @return <String> an imprint statement from a single originInfo element
def display_str
- imprint_statements.join('; ') if imprint_statements.present?
- end
+ edition = edition_vals_str
+ place = place_vals_str
+ publisher = publisher_vals_str
+ dates = date_str
- # @return Array<Integer> an array of publication years for the resource
- def publication_date_for_slider
- @originInfo_ng_nodeset.map do |origin_info_node|
- date_elements = if origin_info_node.as_object.first.key_dates.any?
- origin_info_node.as_object.first.key_dates.map(&:as_object).map(&:first)
- else
- date_field_keys.map do |date_field|
- next unless origin_info_node.respond_to?(date_field)
+ place_pub = compact_and_join_with_delimiter([place, publisher], ' : ')
+ edition_place_pub = compact_and_join_with_delimiter([edition, place_pub], ' - ')
+ ed_place_pub_dates = compact_and_join_with_delimiter([edition_place_pub, dates], ', ')
- date_elements = origin_info_node.send(date_field)
- date_elements.map(&:as_object).map(&:first) if date_elements.any?
- end.compact.first
- end
-
- if date_elements.nil? || date_elements.none?
- []
- elsif date_elements.find(&:start?) &&
- date_elements.find(&:start?).as_range &&
- date_elements.find(&:end?) &&
- date_elements.find(&:end?).as_range
- start_date = date_elements.find(&:start?)
- end_date = date_elements.find(&:end?)
-
- (start_date.as_range.min.year..end_date.as_range.max.year).to_a
- elsif date_elements.find(&:start?) && date_elements.find(&:start?).as_range
- start_date = date_elements.find(&:start?)
-
- (start_date.as_range.min.year..Time.now.year).to_a
- elsif date_elements.one?
- date_elements.first.to_a.map(&:year)
- else
- date_elements.map { |v| v.to_a.map(&:year) }.flatten
- end
- end.flatten
+ ed_place_pub_dates
end
private
- def extract_year(el)
- DateParsing.year_int_from_date_str(el.text)
- end
-
def compact_and_join_with_delimiter(values, delimiter)
compact_values = values.compact.reject { |v| v.strip.empty? }
return compact_values.join(delimiter) if compact_values.length == 1 ||
!ends_in_terminating_punctuation?(delimiter)
@@ -98,31 +54,31 @@
def ends_in_terminating_punctuation?(value)
value.strip.end_with?('.', ',', ':', ';')
end
- def edition_vals_str(origin_info_node)
- origin_info_node.edition.reject do |e|
+ def edition_vals_str
+ element.edition.reject do |e|
e.text.strip.empty?
end.map(&:text).join(' ').strip
end
- def publisher_vals_str(origin_info_node)
- return if origin_info_node.publisher.text.strip.empty?
+ def publisher_vals_str
+ return if element.publisher.text.strip.empty?
- publishers = origin_info_node.publisher.reject do |p|
+ publishers = element.publisher.reject do |p|
p.text.strip.empty?
end.map(&:text)
compact_and_join_with_delimiter(publishers, ' : ')
end
# PLACE processing methods ------
- def place_vals_str(origin_info_node)
- return if origin_info_node.place.text.strip.empty?
+ def place_vals_str
+ return if element.place.text.strip.empty?
- places = place_terms(origin_info_node).reject do |p|
+ places = place_terms.reject do |p|
p.text.strip.empty?
end.map(&:text)
compact_and_join_with_delimiter(places, ' : ')
end
@@ -131,336 +87,203 @@
!term.attributes['type'].respond_to?(:value) ||
term.attributes['type'].value == 'text'
end
end
- def place_terms(origin_info_element)
- return [] unless origin_info_element.respond_to?(:place) &&
- origin_info_element.place.respond_to?(:placeTerm)
+ def place_terms
+ return [] unless element.respond_to?(:place) &&
+ element.place.respond_to?(:placeTerm)
- if unencoded_place_terms?(origin_info_element)
- origin_info_element.place.placeTerm.select do |term|
+ if unencoded_place_terms?(element)
+ element.place.placeTerm.select do |term|
!term.attributes['type'].respond_to?(:value) ||
term.attributes['type'].value == 'text'
end.compact
else
- origin_info_element.place.placeTerm.map do |term|
+ element.place.placeTerm.map do |term|
next unless term.attributes['type'].respond_to?(:value) &&
term.attributes['type'].value == 'code' &&
term.attributes['authority'].respond_to?(:value) &&
term.attributes['authority'].value == 'marccountry' &&
- MARC_COUNTRIES.include?(term.text.strip)
+ !['xx', 'vp'].include?(term.text.strip) &&
+ MARC_COUNTRY.include?(term.text.strip)
term = term.clone
- term.content = MARC_COUNTRIES[term.text.strip]
+ term.content = MARC_COUNTRY[term.text.strip]
term
end.compact
end
end
# DATE processing methods ------
- def date_str(origin_info_node)
- date_vals = origin_info_date_vals(origin_info_node)
+ def date_str
+ date_vals = origin_info_date_vals
return if date_vals.empty?
-
date_vals.map(&:strip).join(' ')
end
- def origin_info_date_vals(origin_info_node)
+ def origin_info_date_vals
date_field_keys.map do |date_field|
- next unless origin_info_node.respond_to?(date_field)
+ next unless element.respond_to?(date_field)
- date_elements = origin_info_node.send(date_field)
- date_elements_display_vals(date_elements) if date_elements.present?
+ date_elements = element.send(date_field)
+ parse_dates(date_elements) if date_elements.present?
end.compact.flatten
end
- def date_elements_display_vals(ng_date_elements)
- apply_date_qualifier_decoration(
- dedup_dates(
- join_date_ranges(
- process_decade_century_dates(
- process_bc_ad_dates(
- process_encoded_dates(ignore_bad_dates(ng_date_elements))
- )
- )
- )
- )
- )
- end
-
def date_field_keys
[:dateIssued, :dateCreated, :dateCaptured, :copyrightDate]
end
- def ignore_bad_dates(ng_date_elements)
- ng_date_elements.select do |ng_date_element|
- val = ng_date_element.text.strip
- val != '9999' && val != '0000-00-00' && val != 'uuuu'
- end
- end
+ class DateValue
+ attr_reader :value
+ delegate :text, :date, :point, :qualifier, :encoding, to: :value
- def process_encoded_dates(ng_date_elements)
- ng_date_elements.map do |ng_date_element|
- if date_is_w3cdtf?(ng_date_element)
- process_w3cdtf_date(ng_date_element)
- elsif date_is_iso8601?(ng_date_element)
- process_iso8601_date(ng_date_element)
- else
- ng_date_element
- end
+ def initialize(value)
+ @value = value
end
- end
- # note that there is no year 0: from https://en.wikipedia.org/wiki/Anno_Domini
- # "AD counting years from the start of this epoch, and BC denoting years before the start of the era.
- # There is no year zero in this scheme, so the year AD 1 immediately follows the year 1 BC."
- # See also https://consul.stanford.edu/display/chimera/MODS+display+rules for etdf
- def process_bc_ad_dates(ng_date_elements)
- ng_date_elements.map do |ng_date_element|
- case
- when date_is_edtf?(ng_date_element) && ng_date_element.text.strip == '0'
- ng_date_element.content = "1 B.C."
- when date_is_bc_edtf?(ng_date_element)
- year = ng_date_element.text.strip.gsub(/^-0*/, '').to_i + 1
- ng_date_element.content = "#{year} B.C."
- when date_is_ad?(ng_date_element)
- ng_date_element.content = "#{ng_date_element.text.strip.gsub(/^0*/, '')} A.D."
- end
- ng_date_element
+ # True if the element text isn't blank or the placeholder "9999".
+ def valid?
+ text.present? && !['9999', '0000-00-00', 'uuuu'].include?(text.strip)
end
- end
- def process_decade_century_dates(ng_date_elements)
- ng_date_elements.map do |ng_date_element|
- if date_is_decade?(ng_date_element)
- process_decade_date(ng_date_element)
- elsif date_is_century?(ng_date_element)
- process_century_date(ng_date_element)
- else
- ng_date_element
+ # Element text reduced to digits and hyphen. Captures date ranges and
+ # negative (B.C.) dates. Used for comparison/deduping.
+ def base_value
+ if text =~ /^\[?1\d{3}-\d{2}\??\]?$/
+ return text.sub(/(\d{2})(\d{2})-(\d{2})/, '\1\2-\1\3')
end
+
+ text.gsub(/(?<![\d])(\d{1,3})([xu-]{1,3})/i) { "#{$1}#{'0' * $2.length}"}.scan(/[\d-]/).join
end
- end
- def join_date_ranges(ng_date_elements)
- if dates_are_range?(ng_date_elements)
- start_date = ng_date_elements.find { |d| d.attributes['point'] && d.attributes['point'].value == 'start' }
- end_date = ng_date_elements.find { |d| d.attributes['point'] && d.attributes['point'].value == 'end' }
- ng_date_elements.map do |date|
- date = date.clone # clone the date object so we don't append the same one
- if normalize_date(date.text) == normalize_date(start_date.text)
- date.content = [start_date.text, end_date.text].join(' - ')
- date
- elsif normalize_date(date.text) != normalize_date(end_date.text)
- date
- end
- end.compact
- elsif dates_are_open_range?(ng_date_elements)
- start_date = ng_date_elements.find { |d| d.attributes['point'] && d.attributes['point'].value == 'start' }
- ng_date_elements.map do |date|
- date = date.clone # clone the date object so we don't append the same one
- date.content = "#{start_date.text}-" if date.text == start_date.text
- date
+ # Decoded version of the date, if it was encoded. Strips leading zeroes.
+ def decoded_value
+ return text.strip unless date
+
+ unless encoding.present?
+ return text.strip unless text =~ /^-?\d+$/ || text =~ /^[\dXxu?-]{4}$/
end
- else
- ng_date_elements
- end
- end
- def dedup_dates(ng_date_elements)
- date_text = ng_date_elements.map { |d| normalize_date(d.text) }
- if date_text != date_text.uniq
- if ng_date_elements.find { |d| d.attributes['qualifier'].respond_to?(:value) }
- [ng_date_elements.find { |d| d.attributes['qualifier'].respond_to?(:value) }]
- elsif ng_date_elements.find { |d| !d.attributes['encoding'] }
- [ng_date_elements.find { |d| !d.attributes['encoding'] }]
+ # Delegate to the appropriate decoding method, if any
+ case value.precision
+ when :day
+ date.strftime('%B %e, %Y')
+ when :month
+ date.strftime('%B %Y')
+ when :year
+ year = date.year
+ if year < 1
+ "#{year.abs + 1} B.C."
+ # Any dates before the year 1000 are explicitly marked A.D.
+ elsif year > 1 && year < 1000
+ "#{year} A.D."
+ else
+ year.to_s
+ end
+ when :century
+ return "#{(date.to_s[0..1].to_i + 1).ordinalize} century"
+ when :decade
+ return "#{date.year}s"
else
- [ng_date_elements.first]
+ text.strip
end
- else
- ng_date_elements
end
- end
- def apply_date_qualifier_decoration(ng_date_elements)
- return_fields = ng_date_elements.map do |date|
- date = date.clone
- if date_is_approximate?(date)
- date.content = "[ca. #{date.text}]"
- elsif date_is_questionable?(date)
- date.content = "[#{date.text}?]"
- elsif date_is_inferred?(date)
- date.content = "[#{date.text}]"
- end
+ # Decoded date with "B.C." or "A.D." and qualifier markers. See (outdated):
+ # https://consul.stanford.edu/display/chimera/MODS+display+rules#MODSdisplayrules-3b.%3CoriginInfo%3E
+ def qualified_value
+ date = decoded_value
+
+ return "[ca. #{date}]" if qualifier == 'approximate'
+ return "[#{date}?]" if qualifier == 'questionable'
+ return "[#{date}]" if qualifier == 'inferred'
+
date
end
- return_fields.map(&:text)
end
- def date_is_approximate?(ng_date_element)
- ng_date_element.attributes['qualifier'] &&
- ng_date_element.attributes['qualifier'].respond_to?(:value) &&
- ng_date_element.attributes['qualifier'].value == 'approximate'
- end
-
- def date_is_questionable?(ng_date_element)
- ng_date_element.attributes['qualifier'] &&
- ng_date_element.attributes['qualifier'].respond_to?(:value) &&
- ng_date_element.attributes['qualifier'].value == 'questionable'
- end
-
- def date_is_inferred?(ng_date_element)
- ng_date_element.attributes['qualifier'] &&
- ng_date_element.attributes['qualifier'].respond_to?(:value) &&
- ng_date_element.attributes['qualifier'].value == 'inferred'
- end
-
- def dates_are_open_range?(ng_date_elements)
- ng_date_elements.any? do |element|
- element.attributes['point'] &&
- element.attributes['point'].respond_to?(:value) &&
- element.attributes['point'].value == 'start'
- end && !ng_date_elements.any? do |element|
- element.attributes['point'] &&
- element.attributes['point'].respond_to?(:value) &&
- element.attributes['point'].value == 'end'
+ class DateRange
+ def initialize(start: nil, stop: nil)
+ @start = start
+ @stop = stop
end
- end
- def dates_are_range?(ng_date_elements)
- attributes = ng_date_elements.map do |date|
- if date.attributes['point'].respond_to?(:value)
- date.attributes['point'].value
- end
+ # Base value as hyphen-joined string. Used for comparison/deduping.
+ def base_value
+ "#{@start&.base_value}-#{@stop&.base_value}"
end
- attributes.include?('start') &&
- attributes.include?('end')
- end
- def process_w3cdtf_date(ng_date_element)
- ng_date_element = ng_date_element.clone
- ng_date_element.content = begin
- if ng_date_element.text.strip =~ /^\d{4}-\d{2}-\d{2}$/
- Date.parse(ng_date_element.text).strftime(full_date_format)
- elsif ng_date_element.text.strip =~ /^\d{4}-\d{2}$/
- Date.parse("#{ng_date_element.text}-01").strftime(short_date_format)
- else
- ng_date_element.content
- end
- rescue
- ng_date_element.content
+ # Base values as array. Used for comparison/deduping of individual dates.
+ def base_values
+ [@start&.base_value, @stop&.base_value].compact
end
- ng_date_element
- end
- def process_iso8601_date(ng_date_element)
- ng_date_element = ng_date_element.clone
- ng_date_element.content = begin
- if ng_date_element.text.strip =~ /^\d{8,}$/
- Date.parse(ng_date_element.text).strftime(full_date_format)
- else
- ng_date_element.content
- end
- rescue
- ng_date_element.content
+ # The encoding value for the start of the range, or stop if not present.
+ def encoding
+ @start&.encoding || @stop&.encoding
end
- ng_date_element
- end
- DECADE_4CHAR_REGEXP = Regexp.new('(^|.*\D)(\d{3}[u\-?x])(.*)')
+ # Decoded dates with "B.C." or "A.D." and qualifier markers applied to
+ # the entire range, or individually if dates differ.
+ def qualified_value
+ if @start&.qualifier == @stop&.qualifier
+ qualifier = @start&.qualifier || @stop&.qualifier
+ date = "#{@start&.decoded_value} - #{@stop&.decoded_value}"
+ return "[ca. #{date}]" if qualifier == 'approximate'
+ return "[#{date}?]" if qualifier == 'questionable'
+ return "[#{date}]" if qualifier == 'inferred'
- # strings like 195x, 195u, 195- and 195? become '1950s' in the ng_date_element content
- def process_decade_date(ng_date_element)
- my_ng_date_element = ng_date_element.clone
- my_ng_date_element.content = begin
- orig_date_str = ng_date_element.text.strip
- # note: not calling DateParsing.display_str_for_decade directly because non-year text is lost
- decade_matches = orig_date_str.match(DECADE_4CHAR_REGEXP) if orig_date_str
- if decade_matches
- decade_str = decade_matches[2]
- changed_to_zero = decade_str.to_s.tr('u\-?x', '0') if decade_str
- zeroth_year = DateParsing.new(changed_to_zero).sortable_year_for_yyyy if changed_to_zero
- new_decade_str = "#{zeroth_year}s" if zeroth_year
- my_ng_date_element.content = "#{decade_matches[1]}#{new_decade_str}#{decade_matches[3]}"
+ date
else
- my_ng_date_element.content
+ "#{@start&.qualified_value} - #{@stop&.qualified_value}"
end
- rescue
- my_ng_date_element.content
end
- my_ng_date_element
end
- CENTURY_4CHAR_REGEXP = Regexp.new('(^|.*\D)((\d{1,2})[u\-]{2})(.*)')
+ def parse_dates(elements)
+ # convert to DateValue objects and keep only valid ones
+ dates = elements.map(&:as_object).flatten.map { |element| DateValue.new(element) }.select(&:valid?)
+ # join any date ranges into DateRange objects
+ point, nonpoint = dates.partition(&:point)
+ if point.any?
+ range = DateRange.new(start: point.find { |date| date.point == 'start' },
+ stop: point.find { |date| date.point == 'end' })
+ nonpoint.unshift(range)
+ end
+ dates = nonpoint
- # strings like 18uu, 18-- become '19th century' in the ng_date_element content
- def process_century_date(ng_date_element)
- my_ng_date_element = ng_date_element.clone
- my_ng_date_element.content = begin
- orig_date_str = ng_date_element.text.strip
- # note: not calling DateParsing.display_str_for_century directly because non-year text is lost
- century_matches = orig_date_str.match(CENTURY_4CHAR_REGEXP) if orig_date_str
- if century_matches
- new_century_str = "#{(century_matches[3].to_i + 1).ordinalize} century"
- my_ng_date_element.content = "#{century_matches[1]}#{new_century_str}#{century_matches[4]}"
+ # ensure dates are unique with respect to their base values
+ dates = dates.group_by(&:base_value).map do |_value, group|
+ next group.first if group.one?
+
+ # if one of the duplicates wasn't encoded, use that one. see:
+ # https://consul.stanford.edu/display/chimera/MODS+display+rules#MODSdisplayrules-3b.%3CoriginInfo%3E
+ if group.reject(&:encoding).any?
+ group.reject(&:encoding).first
+
+ # otherwise just randomly pick the first in the group
else
- my_ng_date_element.content
+ group.last
end
- rescue
- my_ng_date_element.content
end
- my_ng_date_element
- end
- def field_is_encoded?(ng_element, encoding)
- ng_element.attributes['encoding'] &&
- ng_element.attributes['encoding'].respond_to?(:value) &&
- ng_element.attributes['encoding'].value.downcase == encoding
- end
+ # compare the remaining dates against one part of the other of a range
+ date_ranges = dates.select { |date| date.is_a?(DateRange) }
- def date_is_bc_edtf?(ng_date_element)
- ng_date_element.text.strip.start_with?('-') && date_is_edtf?(ng_date_element)
- end
+ # remove any range that duplicates an unencoded date that includes that range
+ duplicated_ranges = dates.flat_map do |date|
+ next if date.is_a?(DateRange) || date.encoding.present?
- def date_is_ad?(ng_date_element)
- str = ng_date_element.text.strip.gsub(/^0*/, '')
- str.present? && str.length < 4 && !str.match('A.D.')
- end
+ date_ranges.select { |r| r.base_values.include?(date.base_value) }
+ end
- def date_is_edtf?(ng_date_element)
- field_is_encoded?(ng_date_element, 'edtf')
- end
+ dates = dates - duplicated_ranges
- def date_is_w3cdtf?(ng_date_element)
- field_is_encoded?(ng_date_element, 'w3cdtf')
- end
-
- def date_is_iso8601?(ng_date_element)
- field_is_encoded?(ng_date_element, 'iso8601')
- end
-
- # @return true if decade string needs tweaking for display
- def date_is_decade?(ng_date_element)
- ng_date_element.text.strip.match(DECADE_4CHAR_REGEXP)
- end
-
- # @return true if century string needs tweaking for display
- def date_is_century?(ng_date_element)
- ng_date_element.text.strip.match(CENTURY_4CHAR_REGEXP)
- end
-
- def full_date_format(full_date_format = '%B %-d, %Y')
- @full_date_format ||= full_date_format
- end
-
- def short_date_format(short_date_format = '%B %Y')
- @short_date_format ||= short_date_format
- end
-
- def normalize_date(date_str)
- date_str.strip.gsub(/^\[*ca\.\s*|c|\[|\]|\?/, '')
+ # output formatted dates with qualifiers, A.D./B.C., etc.
+ dates.map(&:qualified_value)
end
end
end
end