module Csvlint module Csvw class DateFormat attr_reader :pattern def initialize(pattern, datatype = nil) @pattern = pattern if @pattern.nil? @regexp = DEFAULT_REGEXP[datatype] @type = datatype else test_pattern = pattern.clone test_pattern.gsub!(/S+/, "") FIELDS.keys.sort_by { |f| -f.length }.each do |field| test_pattern.gsub!(field, "") end raise Csvw::DateFormatError, "unrecognised date field symbols in date format" if /[GyYuUrQqMLlwWdDFgEecahHKkjJmsSAzZOvVXx]/.match?(test_pattern) @regexp = DATE_PATTERN_REGEXP[@pattern] @type = @regexp.nil? ? "http://www.w3.org/2001/XMLSchema#time" : "http://www.w3.org/2001/XMLSchema#date" @regexp ||= TIME_PATTERN_REGEXP[@pattern] @type = @regexp.nil? ? "http://www.w3.org/2001/XMLSchema#dateTime" : @type @regexp ||= DATE_TIME_PATTERN_REGEXP[@pattern] if @regexp.nil? regexp = @pattern @type = "http://www.w3.org/2001/XMLSchema#date" if !(regexp =~ /HH/) && regexp =~ /yyyy/ @type = "http://www.w3.org/2001/XMLSchema#time" if regexp =~ /HH/ && !(regexp =~ /yyyy/) @type = "http://www.w3.org/2001/XMLSchema#dateTime" if regexp =~ /HH/ && regexp =~ /yyyy/ regexp = regexp.sub("HH", FIELDS["HH"].to_s) regexp = regexp.sub("mm", FIELDS["mm"].to_s) if /ss\.S+/.match?(@pattern) max_fractional_seconds = @pattern.split(".")[-1].length regexp = regexp.sub(/ss\.S+$/, "(?#{FIELDS["ss"]}(.[0-9]{1,#{max_fractional_seconds}})?)") else regexp = regexp.sub("ss", "(?#{FIELDS["ss"]})") end if /yyyy/.match?(regexp) regexp = regexp.sub("yyyy", FIELDS["yyyy"].to_s) regexp = regexp.sub("MM", FIELDS["MM"].to_s) regexp = regexp.sub("M", FIELDS["M"].to_s) regexp = regexp.sub("dd", FIELDS["dd"].to_s) regexp = regexp.sub(/d(?=[-T \/.])/, FIELDS["d"].to_s) end regexp = regexp.sub("XXX", FIELDS["XXX"].to_s) regexp = regexp.sub("XX", FIELDS["XX"].to_s) regexp = regexp.sub("X", FIELDS["X"].to_s) regexp = regexp.sub("xxx", FIELDS["xxx"].to_s) regexp = regexp.sub("xx", FIELDS["xx"].to_s) regexp = regexp.sub(/x(?!:)/, FIELDS["x"].to_s) @regexp = Regexp.new("^#{regexp}$") end end end def match(value) value&.match?(@regexp) ? true : false end def parse(value) match = @regexp.match(value) return nil if match.nil? # STDERR.puts(@regexp) # STDERR.puts(value) # STDERR.puts(match.inspect) value = {} match.names.each do |field| unless match[field].nil? case field when "timezone" tz = match["timezone"] tz = "+00:00" if tz == "Z" tz += ":00" if tz.length == 3 tz = "#{tz[0..2]}:#{tz[3..4]}" unless /:/.match?(tz) value[:timezone] = tz when "second" value[:second] = match["second"].to_f else value[field.to_sym] = match[field].to_i end end end case @type when "http://www.w3.org/2001/XMLSchema#date" begin value[:dateTime] = Date.new(match["year"].to_i, match["month"].to_i, match["day"].to_i) rescue ArgumentError return nil end when "http://www.w3.org/2001/XMLSchema#dateTime" begin value[:dateTime] = DateTime.new(match["year"].to_i, match["month"].to_i, match["day"].to_i, match["hour"].to_i, match["minute"].to_i, (match.names.include?("second") ? match["second"].to_f : 0), (match.names.include?("timezone") && match["timezone"]) ? match["timezone"] : "") rescue ArgumentError return nil end else value[:dateTime] = DateTime.new(value[:year] || 0, value[:month] || 1, value[:day] || 1, value[:hour] || 0, value[:minute] || 0, value[:second] || 0, value[:timezone] || "+00:00") end value[:string] = if value[:year] if value[:month] if value[:day] if value[:hour] # dateTime "#{format("%04d", value[:year])}-#{format("%02d", value[:month])}-#{format("%02d", value[:day])}T#{format("%02d", value[:hour])}:#{format("%02d", value[:minute] || 0)}:#{format("%02g", value[:second] || 0)}#{value[:timezone] ? value[:timezone].sub("+00:00", "Z") : ""}" else # date "#{format("%04d", value[:year])}-#{format("%02d", value[:month])}-#{format("%02d", value[:day])}#{value[:timezone] ? value[:timezone].sub("+00:00", "Z") : ""}" end else # gYearMonth "#{format("%04d", value[:year])}-#{format("%02d", value[:month])}#{value[:timezone] ? value[:timezone].sub("+00:00", "Z") : ""}" end else # gYear "#{format("%04d", value[:year])}#{value[:timezone] ? value[:timezone].sub("+00:00", "Z") : ""}" end elsif value[:month] if value[:day] # gMonthDay "--#{format("%02d", value[:month])}-#{format("%02d", value[:day])}#{value[:timezone] ? value[:timezone].sub("+00:00", "Z") : ""}" else # gMonth "--#{format("%02d", value[:month])}#{value[:timezone] ? value[:timezone].sub("+00:00", "Z") : ""}" end elsif value[:day] # gDay "---#{format("%02d", value[:day])}#{value[:timezone] ? value[:timezone].sub("+00:00", "Z") : ""}" else "#{format("%02d", value[:hour])}:#{format("%02d", value[:minute])}:#{format("%02g", value[:second] || 0)}#{value[:timezone] ? value[:timezone].sub("+00:00", "Z") : ""}" end value end private FIELDS = { "yyyy" => /(?-?([1-9][0-9]{3,}|0[0-9]{3}))/, "MM" => /(?0[1-9]|1[0-2])/, "M" => /(?[1-9]|1[0-2])/, "dd" => /(?0[1-9]|[12][0-9]|3[01])/, "d" => /(?[1-9]|[12][0-9]|3[01])/, "HH" => /(?[01][0-9]|2[0-3])/, "mm" => /(?[0-5][0-9])/, "ss" => /([0-6][0-9])/, "X" => /(?Z|[-+]((0[0-9]|1[0-3])([0-5][0-9])?|14(00)?))/, "XX" => /(?Z|[-+]((0[0-9]|1[0-3])[0-5][0-9]|1400))/, "XXX" => /(?Z|[-+]((0[0-9]|1[0-3]):[0-5][0-9]|14:00))/, "x" => /(?[-+]((0[0-9]|1[0-3])([0-5][0-9])?|14(00)?))/, "xx" => /(?[-+]((0[0-9]|1[0-3])[0-5][0-9]|1400))/, "xxx" => /(?[-+]((0[0-9]|1[0-3]):[0-5][0-9]|14:00))/ } DATE_PATTERN_REGEXP = { "yyyy-MM-dd" => Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}$"), "yyyyMMdd" => Regexp.new("^#{FIELDS["yyyy"]}#{FIELDS["MM"]}#{FIELDS["dd"]}$"), "dd-MM-yyyy" => Regexp.new("^#{FIELDS["dd"]}-#{FIELDS["MM"]}-#{FIELDS["yyyy"]}$"), "d-M-yyyy" => Regexp.new("^#{FIELDS["d"]}-#{FIELDS["M"]}-#{FIELDS["yyyy"]}$"), "MM-dd-yyyy" => Regexp.new("^#{FIELDS["MM"]}-#{FIELDS["dd"]}-#{FIELDS["yyyy"]}$"), "M-d-yyyy" => Regexp.new("^#{FIELDS["M"]}-#{FIELDS["d"]}-#{FIELDS["yyyy"]}$"), "dd/MM/yyyy" => Regexp.new("^#{FIELDS["dd"]}/#{FIELDS["MM"]}/#{FIELDS["yyyy"]}$"), "d/M/yyyy" => Regexp.new("^#{FIELDS["d"]}/#{FIELDS["M"]}/#{FIELDS["yyyy"]}$"), "MM/dd/yyyy" => Regexp.new("^#{FIELDS["MM"]}/#{FIELDS["dd"]}/#{FIELDS["yyyy"]}$"), "M/d/yyyy" => Regexp.new("^#{FIELDS["M"]}/#{FIELDS["d"]}/#{FIELDS["yyyy"]}$"), "dd.MM.yyyy" => Regexp.new("^#{FIELDS["dd"]}.#{FIELDS["MM"]}.#{FIELDS["yyyy"]}$"), "d.M.yyyy" => Regexp.new("^#{FIELDS["d"]}.#{FIELDS["M"]}.#{FIELDS["yyyy"]}$"), "MM.dd.yyyy" => Regexp.new("^#{FIELDS["MM"]}.#{FIELDS["dd"]}.#{FIELDS["yyyy"]}$"), "M.d.yyyy" => Regexp.new("^#{FIELDS["M"]}.#{FIELDS["d"]}.#{FIELDS["yyyy"]}$") } TIME_PATTERN_REGEXP = { "HH:mm:ss" => Regexp.new("^#{FIELDS["HH"]}:#{FIELDS["mm"]}:(?#{FIELDS["ss"]})$"), "HHmmss" => Regexp.new("^#{FIELDS["HH"]}#{FIELDS["mm"]}(?#{FIELDS["ss"]})$"), "HH:mm" => Regexp.new("^#{FIELDS["HH"]}:#{FIELDS["mm"]}$"), "HHmm" => Regexp.new("^#{FIELDS["HH"]}#{FIELDS["mm"]}$") } DATE_TIME_PATTERN_REGEXP = { "yyyy-MM-ddTHH:mm:ss" => Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}T#{FIELDS["HH"]}:#{FIELDS["mm"]}:(?#{FIELDS["ss"]})$"), "yyyy-MM-ddTHH:mm" => Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}T#{FIELDS["HH"]}:#{FIELDS["mm"]}$") } DEFAULT_REGEXP = { "http://www.w3.org/2001/XMLSchema#date" => Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}#{FIELDS["XXX"]}?$"), "http://www.w3.org/2001/XMLSchema#dateTime" => Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}T#{FIELDS["HH"]}:#{FIELDS["mm"]}:(?#{FIELDS["ss"]}(.[0-9]+)?)#{FIELDS["XXX"]}?$"), "http://www.w3.org/2001/XMLSchema#dateTimeStamp" => Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}-#{FIELDS["dd"]}T#{FIELDS["HH"]}:#{FIELDS["mm"]}:(?#{FIELDS["ss"]}(.[0-9]+)?)#{FIELDS["XXX"]}$"), "http://www.w3.org/2001/XMLSchema#gDay" => Regexp.new("^---#{FIELDS["dd"]}#{FIELDS["XXX"]}?$"), "http://www.w3.org/2001/XMLSchema#gMonth" => Regexp.new("^--#{FIELDS["MM"]}#{FIELDS["XXX"]}?$"), "http://www.w3.org/2001/XMLSchema#gMonthDay" => Regexp.new("^--#{FIELDS["MM"]}-#{FIELDS["dd"]}#{FIELDS["XXX"]}?$"), "http://www.w3.org/2001/XMLSchema#gYear" => Regexp.new("^#{FIELDS["yyyy"]}#{FIELDS["XXX"]}?$"), "http://www.w3.org/2001/XMLSchema#gYearMonth" => Regexp.new("^#{FIELDS["yyyy"]}-#{FIELDS["MM"]}#{FIELDS["XXX"]}?$"), "http://www.w3.org/2001/XMLSchema#time" => Regexp.new("^#{FIELDS["HH"]}:#{FIELDS["mm"]}:(?#{FIELDS["ss"]}(.[0-9]+)?)#{FIELDS["XXX"]}?$") } end class DateFormatError < StandardError end end end