/* date_strptime.c: Coded by Tadayoshi Funaba 2011,2012 */ #include "ruby.h" #include "ruby/encoding.h" #include "ruby/re.h" #include #undef strncasecmp #define strncasecmp STRNCASECMP static const char *day_names[] = { "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", }; static const int ABBREVIATED_DAY_NAME_LENGTH = 3; static const char *month_names[] = { "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December", }; static const int ABBREVIATED_MONTH_NAME_LENGTH = 3; #define sizeof_array(o) (sizeof o / sizeof o[0]) #define f_negate(x) rb_funcall(x, rb_intern("-@"), 0) #define f_add(x,y) rb_funcall(x, '+', 1, y) #define f_sub(x,y) rb_funcall(x, '-', 1, y) #define f_mul(x,y) rb_funcall(x, '*', 1, y) #define f_div(x,y) rb_funcall(x, '/', 1, y) #define f_idiv(x,y) rb_funcall(x, rb_intern("div"), 1, y) #define f_mod(x,y) rb_funcall(x, '%', 1, y) #define f_expt(x,y) rb_funcall(x, rb_intern("**"), 1, y) #define f_lt_p(x,y) rb_funcall(x, '<', 1, y) #define f_gt_p(x,y) rb_funcall(x, '>', 1, y) #define f_le_p(x,y) rb_funcall(x, rb_intern("<="), 1, y) #define f_ge_p(x,y) rb_funcall(x, rb_intern(">="), 1, y) #define f_match(r,s) rb_funcall(r, rb_intern("match"), 1, s) #define f_aref(o,i) rb_funcall(o, rb_intern("[]"), 1, i) #define f_end(o,i) rb_funcall(o, rb_intern("end"), 1, i) #define issign(c) ((c) == '-' || (c) == '+') static int num_pattern_p(const char *s) { if (isdigit((unsigned char)*s)) return 1; if (*s == '%') { s++; if (*s == 'E' || *s == 'O') s++; if (*s && (strchr("CDdeFGgHIjkLlMmNQRrSsTUuVvWwXxYy", *s) || isdigit((unsigned char)*s))) return 1; } return 0; } #define NUM_PATTERN_P() num_pattern_p(&fmt[fi + 1]) static long read_digits(const char *s, size_t slen, VALUE *n, size_t width) { size_t l; if (!width) return 0; l = 0; while (l < slen && ISDIGIT(s[l])) { if (++l == width) break; } if (l == 0) return 0; if ((4 * l * sizeof(char)) <= (sizeof(long)*CHAR_BIT)) { const char *os = s; long v; v = 0; while ((size_t)(s - os) < l) { v *= 10; v += *s - '0'; s++; } if (os == s) return 0; *n = LONG2NUM(v); return l; } else { VALUE vbuf = 0; char *s2 = ALLOCV_N(char, vbuf, l + 1); memcpy(s2, s, l); s2[l] = '\0'; *n = rb_cstr_to_inum(s2, 10, 0); ALLOCV_END(vbuf); return l; } } #define set_hash(k,v) rb_hash_aset(hash, ID2SYM(rb_intern(k"")), v) #define ref_hash(k) rb_hash_aref(hash, ID2SYM(rb_intern(k""))) #define del_hash(k) rb_hash_delete(hash, ID2SYM(rb_intern(k""))) #define fail() \ do { \ set_hash("_fail", Qtrue); \ return 0; \ } while (0) #define fail_p() (!NIL_P(ref_hash("_fail"))) #define READ_DIGITS(n,w) \ do { \ size_t l; \ l = read_digits(&str[si], slen - si, &n, w); \ if (l == 0) \ fail(); \ si += l; \ } while (0) #define READ_DIGITS_MAX(n) READ_DIGITS(n, LONG_MAX) static int valid_range_p(VALUE v, int a, int b) { if (FIXNUM_P(v)) { int vi = FIX2INT(v); return !(vi < a || vi > b); } return !(f_lt_p(v, INT2NUM(a)) || f_gt_p(v, INT2NUM(b))); } #define recur(fmt) \ do { \ size_t l; \ l = date__strptime_internal(&str[si], slen - si, \ fmt, sizeof fmt - 1, hash); \ if (fail_p()) \ return 0; \ si += l; \ } while (0) VALUE date_zone_to_diff(VALUE); static inline int head_match_p(size_t len, const char *name, const char *str, size_t slen, size_t si) { return slen - si >= len && strncasecmp(name, &str[si], len) == 0; } static size_t date__strptime_internal(const char *str, size_t slen, const char *fmt, size_t flen, VALUE hash) { size_t si, fi; int c; #define HEAD_MATCH_P(len, name) head_match_p(len, name, str, slen, si) si = fi = 0; while (fi < flen) { if (isspace((unsigned char)fmt[fi])) { while (si < slen && isspace((unsigned char)str[si])) si++; while (++fi < flen && isspace((unsigned char)fmt[fi])); continue; } if (si >= slen) fail(); switch (fmt[fi]) { case '%': again: fi++; c = fmt[fi]; switch (c) { case 'E': if (fmt[fi + 1] && strchr("cCxXyY", fmt[fi + 1])) goto again; fi--; goto ordinal; case 'O': if (fmt[fi + 1] && strchr("deHImMSuUVwWy", fmt[fi + 1])) goto again; fi--; goto ordinal; case ':': { int i; for (i = 1; i < 3 && fi + i < flen && fmt[fi+i] == ':'; ++i); if (fmt[fi+i] == 'z') { fi += i - 1; goto again; } fail(); } case 'A': case 'a': { int i; for (i = 0; i < (int)sizeof_array(day_names); i++) { const char *day_name = day_names[i]; size_t l = strlen(day_name); if (HEAD_MATCH_P(l, day_name) || HEAD_MATCH_P(l = ABBREVIATED_DAY_NAME_LENGTH, day_name)) { si += l; set_hash("wday", INT2FIX(i)); goto matched; } } fail(); } case 'B': case 'b': case 'h': { int i; for (i = 0; i < (int)sizeof_array(month_names); i++) { const char *month_name = month_names[i]; size_t l = strlen(month_name); if (HEAD_MATCH_P(l, month_name) || HEAD_MATCH_P(l = ABBREVIATED_MONTH_NAME_LENGTH, month_name)) { si += l; set_hash("mon", INT2FIX(i + 1)); goto matched; } } fail(); } case 'C': { VALUE n; if (NUM_PATTERN_P()) READ_DIGITS(n, 2); else READ_DIGITS_MAX(n); set_hash("_cent", n); goto matched; } case 'c': recur("%a %b %e %H:%M:%S %Y"); goto matched; case 'D': recur("%m/%d/%y"); goto matched; case 'd': case 'e': { VALUE n; if (str[si] == ' ') { si++; READ_DIGITS(n, 1); } else { READ_DIGITS(n, 2); } if (!valid_range_p(n, 1, 31)) fail(); set_hash("mday", n); goto matched; } case 'F': recur("%Y-%m-%d"); goto matched; case 'G': { VALUE n; if (NUM_PATTERN_P()) READ_DIGITS(n, 4); else READ_DIGITS_MAX(n); set_hash("cwyear", n); goto matched; } case 'g': { VALUE n; READ_DIGITS(n, 2); if (!valid_range_p(n, 0, 99)) fail(); set_hash("cwyear",n); if (NIL_P(ref_hash("_cent"))) set_hash("_cent", INT2FIX(f_ge_p(n, INT2FIX(69)) ? 19 : 20)); goto matched; } case 'H': case 'k': { VALUE n; if (str[si] == ' ') { si++; READ_DIGITS(n, 1); } else { READ_DIGITS(n, 2); } if (!valid_range_p(n, 0, 24)) fail(); set_hash("hour", n); goto matched; } case 'I': case 'l': { VALUE n; if (str[si] == ' ') { si++; READ_DIGITS(n, 1); } else { READ_DIGITS(n, 2); } if (!valid_range_p(n, 1, 12)) fail(); set_hash("hour", n); goto matched; } case 'j': { VALUE n; READ_DIGITS(n, 3); if (!valid_range_p(n, 1, 366)) fail(); set_hash("yday", n); goto matched; } case 'L': case 'N': { VALUE n; int sign = 1; size_t osi; if (issign(str[si])) { if (str[si] == '-') sign = -1; si++; } osi = si; if (NUM_PATTERN_P()) READ_DIGITS(n, c == 'L' ? 3 : 9); else READ_DIGITS_MAX(n); if (sign == -1) n = f_negate(n); set_hash("sec_fraction", rb_rational_new2(n, f_expt(INT2FIX(10), ULONG2NUM(si - osi)))); goto matched; } case 'M': { VALUE n; READ_DIGITS(n, 2); if (!valid_range_p(n, 0, 59)) fail(); set_hash("min", n); goto matched; } case 'm': { VALUE n; READ_DIGITS(n, 2); if (!valid_range_p(n, 1, 12)) fail(); set_hash("mon", n); goto matched; } case 'n': case 't': recur(" "); goto matched; case 'P': case 'p': if (slen - si < 2) fail(); { char c = str[si]; const int hour = (c == 'P' || c == 'p') ? 12 : 0; if (!hour && !(c == 'A' || c == 'a')) fail(); if ((c = str[si+1]) == '.') { if (slen - si < 4 || str[si+3] != '.') fail(); c = str[si += 2]; } if (!(c == 'M' || c == 'm')) fail(); si += 2; set_hash("_merid", INT2FIX(hour)); goto matched; } case 'Q': { VALUE n; int sign = 1; if (str[si] == '-') { sign = -1; si++; } READ_DIGITS_MAX(n); if (sign == -1) n = f_negate(n); set_hash("seconds", rb_rational_new2(n, INT2FIX(1000))); goto matched; } case 'R': recur("%H:%M"); goto matched; case 'r': recur("%I:%M:%S %p"); goto matched; case 'S': { VALUE n; READ_DIGITS(n, 2); if (!valid_range_p(n, 0, 60)) fail(); set_hash("sec", n); goto matched; } case 's': { VALUE n; int sign = 1; if (str[si] == '-') { sign = -1; si++; } READ_DIGITS_MAX(n); if (sign == -1) n = f_negate(n); set_hash("seconds", n); goto matched; } case 'T': recur("%H:%M:%S"); goto matched; case 'U': case 'W': { VALUE n; READ_DIGITS(n, 2); if (!valid_range_p(n, 0, 53)) fail(); set_hash(c == 'U' ? "wnum0" : "wnum1", n); goto matched; } case 'u': { VALUE n; READ_DIGITS(n, 1); if (!valid_range_p(n, 1, 7)) fail(); set_hash("cwday", n); goto matched; } case 'V': { VALUE n; READ_DIGITS(n, 2); if (!valid_range_p(n, 1, 53)) fail(); set_hash("cweek", n); goto matched; } case 'v': recur("%e-%b-%Y"); goto matched; case 'w': { VALUE n; READ_DIGITS(n, 1); if (!valid_range_p(n, 0, 6)) fail(); set_hash("wday", n); goto matched; } case 'X': recur("%H:%M:%S"); goto matched; case 'x': recur("%m/%d/%y"); goto matched; case 'Y': { VALUE n; int sign = 1; if (issign(str[si])) { if (str[si] == '-') sign = -1; si++; } if (NUM_PATTERN_P()) READ_DIGITS(n, 4); else READ_DIGITS_MAX(n); if (sign == -1) n = f_negate(n); set_hash("year", n); goto matched; } case 'y': { VALUE n; int sign = 1; READ_DIGITS(n, 2); if (!valid_range_p(n, 0, 99)) fail(); if (sign == -1) n = f_negate(n); set_hash("year", n); if (NIL_P(ref_hash("_cent"))) set_hash("_cent", INT2FIX(f_ge_p(n, INT2FIX(69)) ? 19 : 20)); goto matched; } case 'Z': case 'z': { static const char pat_source[] = "\\A(" "(?:gmt|utc?)?[-+]\\d+(?:[,.:]\\d+(?::\\d+)?)?" "|(?-i:[[:alpha:].\\s]+)(?:standard|daylight)\\s+time\\b" "|(?-i:[[:alpha:]]+)(?:\\s+dst)?\\b" ")"; static VALUE pat = Qnil; VALUE m, b; if (NIL_P(pat)) { pat = rb_reg_new(pat_source, sizeof pat_source - 1, ONIG_OPTION_IGNORECASE); rb_obj_freeze(pat); rb_gc_register_mark_object(pat); } b = rb_backref_get(); rb_match_busy(b); m = f_match(pat, rb_usascii_str_new(&str[si], slen - si)); if (!NIL_P(m)) { VALUE s, l, o; s = rb_reg_nth_match(1, m); l = f_end(m, INT2FIX(0)); o = date_zone_to_diff(s); si += NUM2LONG(l); set_hash("zone", s); set_hash("offset", o); rb_backref_set(b); goto matched; } rb_backref_set(b); fail(); } case '%': if (str[si] != '%') fail(); si++; goto matched; case '+': recur("%a %b %e %H:%M:%S %Z %Y"); goto matched; default: if (str[si] != '%') fail(); si++; if (fi < flen) { if (si >= slen || str[si] != fmt[fi]) fail(); si++; } goto matched; } default: ordinal: if (str[si] != fmt[fi]) fail(); si++; fi++; break; matched: fi++; break; } } return si; } VALUE date__strptime(const char *str, size_t slen, const char *fmt, size_t flen, VALUE hash) { size_t si; VALUE cent, merid; si = date__strptime_internal(str, slen, fmt, flen, hash); if (slen > si) { VALUE s; s = rb_usascii_str_new(&str[si], slen - si); set_hash("leftover", s); } if (fail_p()) return Qnil; cent = del_hash("_cent"); if (!NIL_P(cent)) { VALUE year; year = ref_hash("cwyear"); if (!NIL_P(year)) set_hash("cwyear", f_add(year, f_mul(cent, INT2FIX(100)))); year = ref_hash("year"); if (!NIL_P(year)) set_hash("year", f_add(year, f_mul(cent, INT2FIX(100)))); } merid = del_hash("_merid"); if (!NIL_P(merid)) { VALUE hour; hour = ref_hash("hour"); if (!NIL_P(hour)) { hour = f_mod(hour, INT2FIX(12)); set_hash("hour", f_add(hour, merid)); } } return hash; } /* Local variables: c-file-style: "ruby" End: */