lib/ddr/models/year_facet.rb in ddr-models-2.11.0 vs lib/ddr/models/year_facet.rb in ddr-models-3.0.0.alpha.1
- old
+ new
@@ -1,96 +1,154 @@
require "date"
-require "edtf"
module Ddr::Models
class YearFacet
EARLIEST_YEAR = 1000
- LATEST_YEAR = Date.today.year + 100
- VALID_YEARS = (EARLIEST_YEAR..LATEST_YEAR)
- VALUE_SEP = /;/
# Between 1965 and 1968
BETWEEN = Regexp.new '\A([Bb]etween\s+)(\d{4})(\s+and\s+)(\d{4})\??\z'
- # circa 1920, ca. 1920, c1920 => 1920
- CIRCA = Regexp.new '\b(circa\s+|ca?\.\s*|c(?=\d{4}[^\d]*))'
+ # YYYx (192x)
+ # YYYX (192X)
+ # YYY? (192?)
+ # YYY- (192-)
+ # YYY-? (192-?)
+ IN_DECADE = Regexp.new '\A(\d{3})([xX\-]\??|\?)\z'
- # 1935-1940 => 1935/1940
- YEAR_RANGE = Regexp.new '(?<=\d{4})-(?=\d{4})'
+ # YYxx (19xx)
+ IN_CENTURY = Regexp.new '\A(\d{2})xx\z'
- # 1920s, 1920s?, 192u, 192-, 192-?, 192? => 192x
- DECADE = Regexp.new '(?<=\A\d{3})(-\??|0s\??|u|\?)\z'
+ # YYY0s (1920s)
+ # YYY0s? (1920s?)
+ DECADE = Regexp.new '\A(\d{3}0)s\??\z'
- # 2010/01 => 2010-01
- MONTH = Regexp.new '(?<=\A\d{4})\/(?=\d{2}\z)'
+ # YYYY-MM (2010-01)
+ # YYYY/MM (2010/01)
+ YEAR_MONTH = Regexp.new '\A(\d{4})[/-](0[1-9]|1[0-2])\z'
- # 193u/, 193x/ => 1930/
- START_DECADE = Regexp.new '(?<=\d{3})[uxX](?=\/)'
+ # YYYY-YYYY (1935-2010)
+ # YYYY/YYYY (1935/2010)
+ YEAR_RANGE = Regexp.new '\A(\d{4})[/-](\d{4})\z'
- # /194x, /194u => /1949
- END_DECADE = Regexp.new '(?<=\/\d{3})[uxX]'
+ # YYYY (1979)
+ YEAR = Regexp.new '\A\d{4}\z'
- # 19uu => 19xx
- CENTURY = Regexp.new '(?<=\A\d{2})uu(?=\z)'
+ SQUARE_BRACKETS = Regexp.new '[\[\]]'
- def self.call(object)
- new(object).call
+ # c. 1920
+ # ca. 1920
+ # c1920
+ CIRCA = Regexp.new '\b(circa\s+|ca?\.\s*|c(?=\d{4}[^\d]*))'
+
+ class << self
+ def call(obj)
+ new(obj).values
+ end
end
- attr_reader :object
+ attr_reader :obj, :values
- def initialize(object)
- @object = object
+ def initialize(obj)
+ @obj = obj
+ @values = []
+ facet_values
end
- def call
- source_dates.each_with_object([]) do |date, facet_values|
- date.split(VALUE_SEP).each do |value|
- value.strip!
- edtf_date = convert_to_edtf(value)
- years = Array(edtf_years(edtf_date))
- years.select! { |year| VALID_YEARS.include?(year) }
- facet_values.push(*years)
+ def facet_values
+ obj.descMetadata.date.each do |date|
+ date.split(/;/).each do |value|
+ clean! value
+ years = extract_years(value)
+ validate! years
+ values.push *years
end
end
end
- private
+ def extract_years(value)
+ years = match_years(value) || parse_year(value)
+ Array(years)
+ end
- def source_dates
- object.descMetadata.date
+ def clean!(value)
+ value.strip!
+ value.gsub! SQUARE_BRACKETS, ""
+ value.gsub! CIRCA, ""
end
- def convert_to_edtf(value)
- if m = BETWEEN.match(value)
- value.sub! m[1], "" # [Bb]etween
- value.sub! m[3], "/" # and
+ def validate!(years)
+ years = years & valid_years.to_a
+ end
+
+ def parse_year(value)
+ Date.parse(value).year
+ rescue ArgumentError
+ nil
+ end
+
+ def valid_years
+ (EARLIEST_YEAR..latest_year)
+ end
+
+ def latest_year
+ Date.today.year + 100
+ end
+
+ def match_years(value)
+ result = match_year_range(value) ||
+ match_year_month(value) ||
+ match_year(value) ||
+ match_in_decade(value) ||
+ match_in_century(value) ||
+ match_decade(value) ||
+ match_between(value)
+ first_year, last_year = Array(result).map(&:to_i)
+ last_year ? (first_year..last_year) : first_year
+ end
+
+ def match_year_range(value)
+ if m = YEAR_RANGE.match(value)
+ m[1, 2]
end
- substitutions.reduce(value) { |memo, (regexp, repl)| memo.gsub(regexp, repl) }
end
- def substitutions
- [
- [ CIRCA, "" ],
- [ YEAR_RANGE, "/" ],
- [ DECADE, "x" ],
- [ MONTH, "-" ],
- [ START_DECADE, "0" ],
- [ END_DECADE, "9" ],
- [ CENTURY, "xx" ],
- ]
+ def match_year_month(value)
+ if m = YEAR_MONTH.match(value)
+ m[1]
+ end
end
- def edtf_years(value)
- case parsed = EDTF.parse!(value)
- when Date, EDTF::Season
- parsed.year
- when EDTF::Set, EDTF::Interval, EDTF::Epoch
- parsed.map(&:year).uniq
+ def match_year(value)
+ if m = YEAR.match(value)
+ value
end
- rescue ArgumentError # EDTF cannot parse
- nil
+ end
+
+ def match_in_decade(value)
+ if m = IN_DECADE.match(value)
+ [ "#{m[1]}0", "#{m[1]}9" ]
+ end
+ end
+
+ def match_in_century(value)
+ if m = IN_CENTURY.match(value)
+ [ "#{m[1]}00", "#{m[1]}99" ]
+ end
+ end
+
+ def match_decade(value)
+ if m = DECADE.match(value)
+ [ m[1], m[1].sub(/0\z/, "9") ]
+ end
+ end
+
+ def match_between(value)
+ if m = BETWEEN.match(value)
+ value.sub! m[1], "" # [Bb]etween
+ value.sub! m[3], "-" # and
+ match_year_range(value)
+ end
end
end
end