lib/validates_timeliness/parser.rb in validates_timeliness-2.3.2 vs lib/validates_timeliness/parser.rb in validates_timeliness-3.0.0.beta

- old
+ new

@@ -1,44 +1,416 @@ +require 'date' + module ValidatesTimeliness - module Parser + # A date and time parsing library which allows you to add custom formats using + # simple predefined tokens. This makes it much easier to catalogue and customize + # the formats rather than dealing directly with regular expressions. + # + # Formats can be added or removed to customize the set of valid date or time + # string values. + # + class Parser + cattr_accessor :time_formats, + :date_formats, + :datetime_formats, + :time_expressions, + :date_expressions, + :datetime_expressions, + :format_tokens, + :format_proc_args + + + # Set the threshold value for a two digit year to be considered last century + # + # Default: 30 + # + # Example: + # year = '29' is considered 2029 + # year = '30' is considered 1930 + # + cattr_accessor :ambiguous_year_threshold + self.ambiguous_year_threshold = 30 + + # Set the dummy date part for a time type value. Should be an array of 3 values + # being year, month and day in that order. + # + # Default: [ 2000, 1, 1 ] same as ActiveRecord + # + cattr_accessor :dummy_date_for_time_type + self.dummy_date_for_time_type = [ 2000, 1, 1 ] + + # Format tokens: + # y = year + # m = month + # d = day + # h = hour + # n = minute + # s = second + # u = micro-seconds + # ampm = meridian (am or pm) with or without dots (e.g. am, a.m, or a.m.) + # _ = optional space + # tz = Timezone abbreviation (e.g. UTC, GMT, PST, EST) + # zo = Timezone offset (e.g. +10:00, -08:00, +1000) + # + # All other characters are considered literal. You can embed regexp in the + # format but no gurantees that it will remain intact. If you avoid the use + # of any token characters and regexp dots or backslashes as special characters + # in the regexp, it may well work as expected. For special characters use + # POSIX character clsses for safety. + # + # Repeating tokens: + # x = 1 or 2 digits for unit (e.g. 'h' means an hour can be '9' or '09') + # xx = 2 digits exactly for unit (e.g. 'hh' means an hour can only be '09') + # + # Special Cases: + # yy = 2 or 4 digit year + # yyyy = exactly 4 digit year + # mmm = month long name (e.g. 'Jul' or 'July') + # ddd = Day name of 3 to 9 letters (e.g. Wed or Wednesday) + # u = microseconds matches 1 to 6 digits + # + # Any other invalid combination of repeating tokens will be swallowed up + # by the next lowest length valid repeating token (e.g. yyy will be + # replaced with yy) + + @@time_formats = [ + 'hh:nn:ss', + 'hh-nn-ss', + 'h:nn', + 'h.nn', + 'h nn', + 'h-nn', + 'h:nn_ampm', + 'h.nn_ampm', + 'h nn_ampm', + 'h-nn_ampm', + 'h_ampm' + ] + + @@date_formats = [ + 'yyyy-mm-dd', + 'yyyy/mm/dd', + 'yyyy.mm.dd', + 'm/d/yy', + 'd/m/yy', + 'm\d\yy', + 'd\m\yy', + 'd-m-yy', + 'dd-mm-yyyy', + 'd.m.yy', + 'd mmm yy' + ] + + @@datetime_formats = [ + 'yyyy-mm-dd hh:nn:ss', + 'yyyy-mm-dd h:nn', + 'yyyy-mm-dd h:nn_ampm', + 'yyyy-mm-dd hh:nn:ss.u', + 'm/d/yy h:nn:ss', + 'm/d/yy h:nn_ampm', + 'm/d/yy h:nn', + 'd/m/yy hh:nn:ss', + 'd/m/yy h:nn_ampm', + 'd/m/yy h:nn', + 'dd-mm-yyyy hh:nn:ss', + 'dd-mm-yyyy h:nn_ampm', + 'dd-mm-yyyy h:nn', + 'ddd, dd mmm yyyy hh:nn:ss (zo|tz)', # RFC 822 + 'ddd mmm d hh:nn:ss zo yyyy', # Ruby time string + 'yyyy-mm-ddThh:nn:ssZ', # iso 8601 without zone offset + 'yyyy-mm-ddThh:nn:sszo' # iso 8601 with zone offset + ] + + + # All tokens available for format construction. The token array is made of + # validation regexp and key for format proc mapping if any. + # If the token needs no format proc arg then the validation regexp should + # not have a capturing group, as all captured groups are passed to the + # format proc. + # + # The token regexp should only use a capture group if 'look-behind' anchor + # is required. The first capture group will be considered a literal and put + # into the validation regexp string as-is. This is a hack. + # + @@format_tokens = { + 'ddd' => [ '\w{3,9}' ], + 'dd' => [ '\d{2}', :day ], + 'd' => [ '\d{1,2}', :day ], + 'ampm' => [ '[aApP]\.?[mM]\.?', :meridian ], + 'mmm' => [ '\w{3,9}', :month ], + 'mm' => [ '\d{2}', :month ], + 'm' => [ '\d{1,2}', :month ], + 'yyyy' => [ '\d{4}', :year ], + 'yy' => [ '\d{4}|\d{2}', :year ], + 'hh' => [ '\d{2}', :hour ], + 'h' => [ '\d{1,2}', :hour ], + 'nn' => [ '\d{2}', :min ], + 'n' => [ '\d{1,2}', :min ], + 'ss' => [ '\d{2}', :sec ], + 's' => [ '\d{1,2}', :sec ], + 'u' => [ '\d{1,6}', :usec ], + 'zo' => [ '[+-]\d{2}:?\d{2}', :offset ], + 'tz' => [ '[A-Z]{1,4}' ], + '_' => [ '\s?' ] + } + + # Arguments which will be passed to the format proc if matched in the + # time string. The key must be the key from the format tokens. The array + # consists of the arry position of the arg, the arg name, and the code to + # place in the time array slot. The position can be nil which means the arg + # won't be placed in the array. + # + # The code can be used to manipulate the arg value if required, otherwise + # should just be the arg name. + # + @@format_proc_args = { + :year => [0, 'y', 'unambiguous_year(y)'], + :month => [1, 'm', 'month_index(m)'], + :day => [2, 'd', 'd'], + :hour => [3, 'h', 'full_hour(h, md ||= nil)'], + :min => [4, 'n', 'n'], + :sec => [5, 's', 's'], + :usec => [6, 'u', 'microseconds(u)'], + :offset => [7, 'z', 'offset_in_seconds(z)'], + :meridian => [nil, 'md', nil] + } + + + @@type_wrapper = { + :date => [/\A/, nil], + :time => [nil , /\Z/], + :datetime => [/\A/, /\Z/] + } + class << self + def compile_format_expressions + @@time_expressions = compile_formats(@@time_formats) + @@date_expressions = compile_formats(@@date_formats) + @@datetime_expressions = compile_formats(@@datetime_formats) + end + def parse(raw_value, type, options={}) return nil if raw_value.blank? - return raw_value if raw_value.acts_like?(:time) || raw_value.is_a?(Date) + return raw_value if raw_value.acts_like?(:time) || raw_value.acts_like?(:date) - time_array = ValidatesTimeliness::Formats.parse(raw_value, type, options.reverse_merge(:strict => true)) + time_array = _parse(raw_value, type, options.reverse_merge(:strict => true)) return nil if time_array.nil? if type == :date Date.new(*time_array[0..2]) rescue nil else - make_time(time_array[0..6]) + make_time(time_array[0..7], options[:timezone_aware]) end end - def make_time(time_array) - # Enforce date part validity which Time class does not + def make_time(time_array, timezone_aware=false) + # Enforce strict date part validity which Time class does not return nil unless Date.valid_civil?(*time_array[0..2]) - if Time.respond_to?(:zone) && ValidatesTimeliness.use_time_zones + if timezone_aware Time.zone.local(*time_array) else - # Older AR way of handling times with datetime fallback - begin - time_zone = ValidatesTimeliness.default_timezone - Time.send(time_zone, *time_array) - rescue ArgumentError, TypeError - zone_offset = time_zone == :local ? DateTime.local_offset : 0 - time_array.pop # remove microseconds - DateTime.civil(*(time_array << zone_offset)) - end + Time.time_with_datetime_fallback(ValidatesTimeliness.default_timezone, *time_array) end rescue ArgumentError, TypeError nil end - end + # Loop through format expressions for type and call the format method on a match. + # Allow pre or post match strings to exist if strict is false. Otherwise wrap + # regexp in start and end anchors. + # + # Returns time array if matches a format, nil otherwise. + # + def _parse(string, type, options={}) + options.reverse_merge!(:strict => true) + sets = if options[:format] + options[:strict] = true + [ send("#{type}_expressions").assoc(options[:format]) ] + else + expression_set(type, string) + end + + set = sets.find do |format, regexp| + string =~ wrap_regexp(regexp, type, options[:strict]) + end + + if set + last = options[:include_offset] ? 8 : 7 + values = send(:"format_#{set[0]}", *$~[1..last]) + values[0..2] = ValidatesTimeliness.dummy_date_for_time_type if type == :time + return values + end + rescue + nil + end + + # Delete formats of specified type. Error raised if format not found. + def remove_formats(type, *remove_formats) + remove_formats.each do |format| + unless self.send("#{type}_formats").delete(format) + raise "Format #{format} not found in #{type} formats" + end + end + compile_format_expressions + end + + # Adds new formats. Must specify format type and can specify a :before + # option to nominate which format the new formats should be inserted in + # front on to take higher precedence. + # Error is raised if format already exists or if :before format is not found. + def add_formats(type, *add_formats) + formats = self.send("#{type}_formats") + options = {} + options = add_formats.pop if add_formats.last.is_a?(Hash) + before = options[:before] + raise "Format for :before option #{format} was not found." if before && !formats.include?(before) + + add_formats.each do |format| + raise "Format #{format} is already included in #{type} formats" if formats.include?(format) + + index = before ? formats.index(before) : -1 + formats.insert(index, format) + end + compile_format_expressions + end + + # Removes formats where the 1 or 2 digit month comes first, to eliminate + # formats which are ambiguous with the European style of day then month. + # The mmm token is ignored as its not ambigous. + def remove_us_formats + us_format_regexp = /\Am{1,2}[^m]/ + date_formats.reject! { |format| us_format_regexp =~ format } + datetime_formats.reject! { |format| us_format_regexp =~ format } + compile_format_expressions + end + + def full_hour(hour, meridian) + hour = hour.to_i + return hour if meridian.nil? + if meridian.delete('.').downcase == 'am' + raise if hour == 0 || hour > 12 + hour == 12 ? 0 : hour + else + hour == 12 ? hour : hour + 12 + end + end + + def unambiguous_year(year) + if year.length <= 2 + century = Time.now.year.to_s[0..1].to_i + century -= 1 if year.to_i >= ambiguous_year_threshold + year = "#{century}#{year.rjust(2,'0')}" + end + year.to_i + end + + def month_index(month) + return month.to_i if month.to_i.nonzero? + abbr_month_names.index(month.capitalize) || month_names.index(month.capitalize) + end + + def month_names + I18n.t('date.month_names') + end + + def abbr_month_names + I18n.t('date.abbr_month_names') + end + + def microseconds(usec) + (".#{usec}".to_f * 1_000_000).to_i + end + + def offset_in_seconds(offset) + sign = offset =~ /^-/ ? -1 : 1 + parts = offset.scan(/\d\d/).map {|p| p.to_f } + parts[1] = parts[1].to_f / 60 + (parts[0] + parts[1]) * sign * 3600 + end + + private + + # Generate regular expression from format string + def generate_format_expression(string_format) + format = string_format.dup + format.gsub!(/([\.\\])/, '\\\\\1') # escapes dots and backslashes + found_tokens, token_order = [], [] + + tokens = format_tokens.keys.sort {|a,b| a.size <=> b.size }.reverse + tokens.each do |token| + regexp_str, arg_key = *format_tokens[token] + if format.gsub!(/#{token}/, "%<#{found_tokens.size}>") + regexp_str = "(#{regexp_str})" if arg_key + found_tokens << [regexp_str, arg_key] + end + end + + format.scan(/%<(\d)>/).each {|token_index| + token_index = token_index.first + token = found_tokens[token_index.to_i] + format.gsub!("%<#{token_index}>", token[0]) + token_order << token[1] + } + + compile_format_method(token_order.compact, string_format) + Regexp.new(format) + rescue + raise "The following format regular expression failed to compile: #{format}\n from format #{string_format}." + end + + # Compiles a format method which maps the regexp capture groups to method + # arguments based on order captured. A time array is built using the values + # in the position indicated by the first element of the proc arg array. + # + def compile_format_method(order, name) + values = [nil] * 7 + args = [] + order.each do |part| + proc_arg = format_proc_args[part] + args << proc_arg[1] + values[proc_arg[0]] = proc_arg[2] if proc_arg[0] + end + class_eval <<-DEF + class << self + define_method(:"format_#{name}") do |#{args.join(',')}| + [#{values.map {|i| i || 'nil' }.join(',')}].map {|i| i.is_a?(Float) ? i : i.to_i } + end + end + DEF + end + + def compile_formats(formats) + formats.map { |format| [ format, generate_format_expression(format) ] } + end + + # Pick expression set and combine date and datetimes for + # datetime attributes to allow date string as datetime + def expression_set(type, string) + case type + when :date + date_expressions + when :time + time_expressions + when :datetime + # gives a speed-up for date string as datetime attributes + if string.length < 11 + date_expressions + datetime_expressions + else + datetime_expressions + date_expressions + end + end + end + + def wrap_regexp(regexp, type, strict=false) + type = strict ? :datetime : type + /#{@@type_wrapper[type][0]}#{regexp}#{@@type_wrapper[type][1]}/ + end + + end end end + +ValidatesTimeliness::Parser.compile_format_expressions