lib/ddr/models/year_facet.rb in ddr-models-3.0.0.rc1 vs lib/ddr/models/year_facet.rb in ddr-models-3.0.0.rc2
- old
+ new
@@ -1,154 +1,96 @@
require "date"
+require "edtf"
module Ddr::Models
class YearFacet
EARLIEST_YEAR = 1000
+ LATEST_YEAR = Date.today.year + 100
+ VALID_YEARS = (EARLIEST_YEAR..LATEST_YEAR)
+ VALUE_SEP = /;/
# Between 1965 and 1968
BETWEEN = Regexp.new '\A([Bb]etween\s+)(\d{4})(\s+and\s+)(\d{4})\??\z'
- # YYYx (192x)
- # YYYX (192X)
- # YYY? (192?)
- # YYY- (192-)
- # YYY-? (192-?)
- IN_DECADE = Regexp.new '\A(\d{3})([xX\-]\??|\?)\z'
+ # circa 1920, ca. 1920, c1920 => 1920
+ CIRCA = Regexp.new '\b(circa\s+|ca?\.\s*|c(?=\d{4}[^\d]*))'
- # YYxx (19xx)
- IN_CENTURY = Regexp.new '\A(\d{2})xx\z'
+ # 1935-1940 => 1935/1940
+ YEAR_RANGE = Regexp.new '(?<=\d{4})-(?=\d{4})'
- # YYY0s (1920s)
- # YYY0s? (1920s?)
- DECADE = Regexp.new '\A(\d{3}0)s\??\z'
+ # 1920s, 1920s?, 192u, 192-, 192-?, 192? => 192x
+ DECADE = Regexp.new '(?<=\A\d{3})(-\??|0s\??|u|\?)\z'
- # YYYY-MM (2010-01)
- # YYYY/MM (2010/01)
- YEAR_MONTH = Regexp.new '\A(\d{4})[/-](0[1-9]|1[0-2])\z'
+ # 2010/01 => 2010-01
+ MONTH = Regexp.new '(?<=\A\d{4})\/(?=\d{2}\z)'
- # YYYY-YYYY (1935-2010)
- # YYYY/YYYY (1935/2010)
- YEAR_RANGE = Regexp.new '\A(\d{4})[/-](\d{4})\z'
+ # 193u/, 193x/ => 1930/
+ START_DECADE = Regexp.new '(?<=\d{3})[uxX](?=\/)'
- # YYYY (1979)
- YEAR = Regexp.new '\A\d{4}\z'
+ # /194x, /194u => /1949
+ END_DECADE = Regexp.new '(?<=\/\d{3})[uxX]'
- SQUARE_BRACKETS = Regexp.new '[\[\]]'
+ # 19uu => 19xx
+ CENTURY = Regexp.new '(?<=\A\d{2})uu(?=\z)'
- # c. 1920
- # ca. 1920
- # c1920
- CIRCA = Regexp.new '\b(circa\s+|ca?\.\s*|c(?=\d{4}[^\d]*))'
-
- class << self
- def call(obj)
- new(obj).values
- end
+ def self.call(object)
+ new(object).call
end
- attr_reader :obj, :values
+ attr_reader :object
- def initialize(obj)
- @obj = obj
- @values = []
- facet_values
+ def initialize(object)
+ @object = object
end
- def facet_values
- obj.desc_metadata.date.each do |date|
- date.split(/;/).each do |value|
- clean! value
- years = extract_years(value)
- validate! years
- values.push *years
+ def call
+ source_dates.each_with_object([]) do |date, facet_values|
+ date.split(VALUE_SEP).each do |value|
+ value.strip!
+ edtf_date = convert_to_edtf(value)
+ years = Array(edtf_years(edtf_date))
+ years.select! { |year| VALID_YEARS.include?(year) }
+ facet_values.push(*years)
end
end
end
- def extract_years(value)
- years = match_years(value) || parse_year(value)
- Array(years)
- end
+ private
- def clean!(value)
- value.strip!
- value.gsub! SQUARE_BRACKETS, ""
- value.gsub! CIRCA, ""
+ def source_dates
+ object.desc_metadata.date
end
- def validate!(years)
- years = years & valid_years.to_a
- end
-
- def parse_year(value)
- Date.parse(value).year
- rescue ArgumentError
- nil
- end
-
- def valid_years
- (EARLIEST_YEAR..latest_year)
- end
-
- def latest_year
- Date.today.year + 100
- end
-
- def match_years(value)
- result = match_year_range(value) ||
- match_year_month(value) ||
- match_year(value) ||
- match_in_decade(value) ||
- match_in_century(value) ||
- match_decade(value) ||
- match_between(value)
- first_year, last_year = Array(result).map(&:to_i)
- last_year ? (first_year..last_year) : first_year
- end
-
- def match_year_range(value)
- if m = YEAR_RANGE.match(value)
- m[1, 2]
+ def convert_to_edtf(value)
+ if m = BETWEEN.match(value)
+ value.sub! m[1], "" # [Bb]etween
+ value.sub! m[3], "/" # and
end
+ substitutions.reduce(value) { |memo, (regexp, repl)| memo.gsub(regexp, repl) }
end
- def match_year_month(value)
- if m = YEAR_MONTH.match(value)
- m[1]
- end
+ def substitutions
+ [
+ [ CIRCA, "" ],
+ [ YEAR_RANGE, "/" ],
+ [ DECADE, "x" ],
+ [ MONTH, "-" ],
+ [ START_DECADE, "0" ],
+ [ END_DECADE, "9" ],
+ [ CENTURY, "xx" ],
+ ]
end
- def match_year(value)
- if m = YEAR.match(value)
- value
+ def edtf_years(value)
+ case parsed = EDTF.parse!(value)
+ when Date, EDTF::Season
+ parsed.year
+ when EDTF::Set, EDTF::Interval, EDTF::Epoch
+ parsed.map(&:year).uniq
end
- end
-
- def match_in_decade(value)
- if m = IN_DECADE.match(value)
- [ "#{m[1]}0", "#{m[1]}9" ]
- end
- end
-
- def match_in_century(value)
- if m = IN_CENTURY.match(value)
- [ "#{m[1]}00", "#{m[1]}99" ]
- end
- end
-
- def match_decade(value)
- if m = DECADE.match(value)
- [ m[1], m[1].sub(/0\z/, "9") ]
- end
- end
-
- def match_between(value)
- if m = BETWEEN.match(value)
- value.sub! m[1], "" # [Bb]etween
- value.sub! m[3], "-" # and
- match_year_range(value)
- end
+ rescue ArgumentError # EDTF cannot parse
+ nil
end
end
end