module Krikri::Util
  module ExtendedDateParser
    module_function

    ##
    # Attempts to parse a string into a valid EDTF or `Date` format.
    #
    #   - Attempts to split `#providedLabel` on '-', '/', '..', 'to', 'until', and
    #     looks for EDTF and `Date.parse` patterns on either side, setting them to
    #     `#begin` and `#end`. Both split and unsplit dates are parsed as follows:
    #   - Attempts to parse `#providedLabel` as an EDTF interval and populates
    #     begin and end with their respective values.
    #   - Attempts to match to a number of regular expressions which specify
    #     ranges informally.
    #   - Attempts to parse `#providedLabel` as a single date value with
    #     `Date.parse` and enters that value to both `#begin` and `#end`.
    #
    # @param date_str [String] a string which may contain a date range
    # @param allow_interval [Boolean] a flag specifing whethe to use
    #   #range_match to look for range values.
    #
    # @return [Date, EDTF::Epoch, EDTF::Interval, nil] the date parsed or nil
    def parse(date_str, allow_interval = false)
      date_str.strip!
      date_str.gsub!(/\s+/, ' ')
      date = parse_interval(date_str) if allow_interval
      date ||= parse_m_d_y(date_str)
      date ||= Date.edtf(date_str.gsub('.', '-'))
      date ||= partial_edtf(date_str)
      date ||= decade_hyphen(date_str)
      date ||= month_year(date_str)
      date ||= decade_s(date_str)
      date ||= hyphenated_partial_range(date_str)
      date ||= parse_date(date_str)
      date || nil
    end

    ##
    # Matches a wide variety of date ranges separated by '..' or '-'
    #
    # @param str [String] a string which may contain a date range
    # @return [Array(String)] the begining and ending dates of an identified
    #    range
    def range_match(str)
      str = str.gsub('to', '-').gsub('until', '-')
      regexp = %r{
        ([a-zA-Z]{0,3}\s?[\d\-\/\.xu\?\~a-zA-Z]*,?\s?
        \d{3}[\d\-xs][s\d\-\.xu\?\~]*)
        \s*[-\.]+\s*
        ([a-zA-Z]{0,3}\s?[\d\-\/\.xu\?\~a-zA-Z]*,?\s?
        \d{3}[\d\-xs][s\d\-\.xu\?\~]*)
      }x
      regexp.match(str) do |m|
        [m[1], m[2]]
      end
    end

    ##
    # Creates an EDTF::Interval from a string
    #
    # @param str [String] a string which may contain a date range
    # @return [ETDF::Interval, nil] an EDTF object representing a date range
    #   or nil if none can be found
    #
    # @see #range_match
    def parse_interval(str)
      match = range_match(str)
      return nil if match.nil?

      begin_date, end_date = match.map { |date| parse(date) || :unknown }

      begin_date = begin_date.first if begin_date.respond_to? :first
      end_date = end_date.last if end_date.respond_to? :last

      EDTF::Interval.new(begin_date, end_date)
    end

    ##
    # Runs `Date#parse`; if arguments are invalid (as with an invalid date
    # string) returns `nil`.
    #
    # @return [Date, nil] the parsed date or nil
    # @see Date#parse
    def parse_date(*args)
      begin
        Date.parse(*args)
      rescue ArgumentError
        nil
      end
    end

    ##
    # Runs `Date#strptime` with '%m-%d-%Y'; if arguments are invalid (as with
    # an invalid date string) returns `nil`.
    #
    # @param value [String] the string to parse
    # @return [Date, nil] the parsed date or nil
    # @see Date#strptime
    def parse_m_d_y(value)
      begin
        Date.strptime(value.gsub(/[^0-9]/, '-'), '%m-%d-%Y')
      rescue ArgumentError
        nil
      end
    end

    ##
    # e.g. 01-2045
    def month_year(str)
      /^(\d{2})-(\d{4})$/.match(str) do |m|
        Date.edtf("#{m[2]}-#{m[1]}")
      end
    end

    ##
    # e.g. 1990-92
    def hyphenated_partial_range(str)
      /^(\d{2})(\d{2})-(\d{2})$/.match(str) do |m|
        Date.edtf("#{m[1]}#{m[2]}/#{m[1]}#{m[3]}")
      end
    end

    ##
    # e.g. 1970-08-01/02 or 1970-12/10
    def partial_edtf(str)
      /^(\d{4}(-\d{2})*)-(\d{2})\/(\d{2})$/.match(str) do |m|
        Date.edtf("#{m[1]}-#{m[3]}/#{m[1]}-#{m[4]}")
      end
    end

    ##
    # e.g. 1990s
    def decade_s(str)
      /^(\d{3})0s$/.match(str) do |m|
        Date.edtf("#{m[1]}x")
      end
    end

    ##
    # e.g. 199-
    def decade_hyphen(str)
      /^(\d{3})-$/.match(str) do |m|
        Date.edtf("#{m[1]}x")
      end
    end
  end
end