platform/shared/ruby/sprintf.c in rhodes-3.1.1 vs platform/shared/ruby/sprintf.c in rhodes-3.2.0.beta.1

- old
+ new

@@ -29,11 +29,11 @@ static char* remove_sign_bits(char *str, int base) { char *s, *t; - + s = t = str; if (base == 16) { while (*t == 'f') { t++; @@ -81,14 +81,16 @@ #define FWIDTH 32 #define FPREC 64 #define FPREC0 128 #define CHECK(l) do {\ + int cr = ENC_CODERANGE(result);\ while (blen + (l) >= bsiz) {\ bsiz*=2;\ }\ rb_str_resize(result, bsiz);\ + ENC_CODERANGE_SET(result, cr);\ buf = RSTRING_PTR(result);\ } while (0) #define PUSH(s, l) do { \ CHECK(l);\ @@ -117,15 +119,16 @@ (posarg = -1, GETNTHARG(n)))) #define GETNTHARG(nth) \ ((nth >= argc) ? (rb_raise(rb_eArgError, "too few arguments"), 0) : argv[nth]) -#define GETNAMEARG(id) (posarg > 0 ? \ - (rb_raise(rb_eArgError, "named after unnumbered(%d)", posarg), 0) : \ +#define GETNAMEARG(id, name, len) ( \ + posarg > 0 ? \ + (rb_raise(rb_eArgError, "named%.*s after unnumbered(%d)", (len), (name), posarg), 0) : \ posarg == -1 ? \ - (rb_raise(rb_eArgError, "named after numbered"), 0) : \ - rb_hash_fetch(get_hash(&hash, argc, argv), id)) + (rb_raise(rb_eArgError, "named%.*s after numbered", (len), (name)), 0) : \ + (posarg = -2, rb_hash_lookup2(get_hash(&hash, argc, argv), id, Qundef))) #define GETNUM(n, val) \ for (; p < end && rb_enc_isdigit(*p, enc); p++) { \ int next_n = 10 * n + (*p - '0'); \ if (next_n / 10 != n) {\ @@ -167,16 +170,16 @@ return (*hash = tmp); } /* * call-seq: - * format(format_string [, arguments...] ) => string - * sprintf(format_string [, arguments...] ) => string - * + * format(format_string [, arguments...] ) -> string + * sprintf(format_string [, arguments...] ) -> string + * * Returns the string resulting from applying <i>format_string</i> to * any additional arguments. Within the format string, any characters - * other than format sequences are copied to the result. + * other than format sequences are copied to the result. * * The syntax of a format sequence is follows. * * %[flags][width][.precision]type * @@ -208,79 +211,83 @@ * | leading 'ff's). * X | Equivalent to `x', but uses uppercase letters. * * Field | Float Format * ------+-------------------------------------------------------------- - * e | Convert floating point argument into exponential notation + * e | Convert floating point argument into exponential notation * | with one digit before the decimal point as [-]d.dddddde[+-]dd. * | The precision specifies the number of digits after the decimal * | point (defaulting to six). * E | Equivalent to `e', but uses an uppercase E to indicate * | the exponent. - * f | Convert floating point argument as [-]ddd.dddddd, + * f | Convert floating point argument as [-]ddd.dddddd, * | where the precision specifies the number of digits after * | the decimal point. * g | Convert a floating point number using exponential form * | if the exponent is less than -4 or greater than or * | equal to the precision, or in dd.dddd form otherwise. * | The precision specifies the number of significant digits. * G | Equivalent to `g', but use an uppercase `E' in exponent form. + * a | Convert floating point argument as [-]0xh.hhhhp[+-]dd, + * | which is consisted from optional sign, "0x", fraction part + * | as hexadecimal, "p", and exponential part as decimal. + * A | Equivalent to `a', but use uppercase `X' and `P'. * * Field | Other Format * ------+-------------------------------------------------------------- * c | Argument is the numeric code for a single character or * | a single character string itself. * p | The valuing of argument.inspect. * s | Argument is a string to be substituted. If the format * | sequence contains a precision, at most that many characters * | will be copied. * % | A percent sign itself will be displayed. No argument taken. - * + * * The flags modifies the behavior of the formats. * The flag characters are: * * Flag | Applies to | Meaning * ---------+---------------+----------------------------------------- - * space | bBdiouxX | Leave a space at the start of - * | eEfgG | non-negative numbers. + * space | bBdiouxX | Leave a space at the start of + * | aAeEfgG | non-negative numbers. * | (numeric fmt) | For `o', `x', `X', `b' and `B', use * | | a minus sign with absolute value for * | | negative values. * ---------+---------------+----------------------------------------- * (digit)$ | all | Specifies the absolute argument number * | | for this field. Absolute and relative * | | argument numbers cannot be mixed in a * | | sprintf string. * ---------+---------------+----------------------------------------- * # | bBoxX | Use an alternative format. - * | eEfgG | For the conversions `o', increase the precision + * | aAeEfgG | For the conversions `o', increase the precision * | | until the first digit will be `0' if * | | it is not formatted as complements. * | | For the conversions `x', `X', `b' and `B' * | | on non-zero, prefix the result with ``0x'', * | | ``0X'', ``0b'' and ``0B'', respectively. - * | | For `e', `E', `f', `g', and 'G', + * | | For `a', `A', `e', `E', `f', `g', and 'G', * | | force a decimal point to be added, * | | even if no digits follow. * | | For `g' and 'G', do not remove trailing zeros. * ---------+---------------+----------------------------------------- * + | bBdiouxX | Add a leading plus sign to non-negative - * | eEfgG | numbers. + * | aAeEfgG | numbers. * | (numeric fmt) | For `o', `x', `X', `b' and `B', use * | | a minus sign with absolute value for * | | negative values. * ---------+---------------+----------------------------------------- * - | all | Left-justify the result of this conversion. * ---------+---------------+----------------------------------------- * 0 (zero) | bBdiouxX | Pad with zeros, not spaces. - * | eEfgG | For `o', `x', `X', `b' and `B', radix-1 + * | aAeEfgG | For `o', `x', `X', `b' and `B', radix-1 * | (numeric fmt) | is used for negative numbers formatted as * | | complements. * ---------+---------------+----------------------------------------- - * * | all | Use the next argument as the field width. + * * | all | Use the next argument as the field width. * | | If negative, left-justify the result. If the - * | | asterisk is followed by a number and a dollar + * | | asterisk is followed by a number and a dollar * | | sign, use the indicated argument as the width. * * Examples of flags: * * # `+' and space flag specifies the sign of non-negative numbers. @@ -334,11 +341,11 @@ * # It also disables stripping lowest zeros. * sprintf("%g", 123.4) #=> "123.4" * sprintf("%#g", 123.4) #=> "123.400" * sprintf("%g", 123456) #=> "123456" * sprintf("%#g", 123456) #=> "123456." - * + * * The field width is an optional integer, followed optionally by a * period and a precision. The width specifies the minimum number of * characters that will be written to the result for this field. * * Examples of width: @@ -387,11 +394,11 @@ * sprintf("%#20.8b", -11) #=> " 0b..110101" * * # precision for `e' is number of * # digits after the decimal point <------> * sprintf("%20.8e", 1234.56789) #=> " 1.23456789e+03" - * + * * # precision for `f' is number of * # digits after the decimal point <------> * sprintf("%20.8f", 1234.56789) #=> " 1234.56789000" * * # precision for `g' is number of @@ -411,10 +418,19 @@ * sprintf("%08b '%4s'", 123, 123) #=> "01111011 ' 123'" * sprintf("%1$*2$s %2$d %1$s", "hello", 8) #=> " hello 8 hello" * sprintf("%1$*2$s %2$d", "hello", -8) #=> "hello -8" * sprintf("%+g:% g:%-g", 1.23, 1.23, 1.23) #=> "+1.23: 1.23:1.23" * sprintf("%u", -123) #=> "-123" + * + * For more complex formatting, Ruby supports a reference by name. + * %<name>s style uses format style, but %{name} style doesn't. + * + * Exapmles: + * sprintf("%<foo>d : %<bar>f", { :foo => 1, :bar => 2 }) + * #=> 1 : 2.000000 + * sprintf("%{foo}f", { :foo => 1 }) + * # => "1f" */ VALUE rb_f_sprintf(int argc, const VALUE *argv) { @@ -425,13 +441,15 @@ rb_str_format(int argc, const VALUE *argv, VALUE fmt) { rb_encoding *enc; const char *p, *end; char *buf; - int blen, bsiz; + long blen, bsiz; VALUE result; + long scanned = 0; + int coderange = ENC_CODERANGE_7BIT; int width, prec, flags = FNONE; int nextarg = 1; int posarg = 0; int tainted = 0; VALUE nextvalue; @@ -466,17 +484,23 @@ bsiz = 120; result = rb_str_buf_new(bsiz); rb_enc_copy(result, fmt); buf = RSTRING_PTR(result); memset(buf, 0, bsiz); + ENC_CODERANGE_SET(result, coderange); for (; p < end; p++) { const char *t; int n; + ID id = 0; for (t = p; t < end && *t != '%'; t++) ; PUSH(p, t - p); + if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) { + scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &coderange); + ENC_CODERANGE_SET(result, coderange); + } if (t >= end) { /* end of fmt string */ goto sprint_exit; } p = t + 1; /* skip `%' */ @@ -542,20 +566,26 @@ case '<': case '{': { const char *start = p; char term = (*p == '<') ? '>' : '}'; - ID id; for (; p < end && *p != term; ) { p += rb_enc_mbclen(p, end, enc); } if (p >= end) { rb_raise(rb_eArgError, "malformed name - unmatched parenthesis"); } + if (id) { + rb_raise(rb_eArgError, "name%.*s after <%s>", + (int)(p - start + 1), start, rb_id2name(id)); + } id = rb_intern3(start + 1, p - start - 1, enc); - nextvalue = GETNAMEARG(ID2SYM(id)); + nextvalue = GETNAMEARG(ID2SYM(id), start, (int)(p - start + 1)); + if (nextvalue == Qundef) { + rb_raise(rb_eKeyError, "key%.*s not found", (int)(p - start + 1), start); + } if (term == '}') goto format_s; p++; goto retry; } @@ -610,16 +640,16 @@ tmp = rb_check_string_type(val); if (!NIL_P(tmp)) { if (rb_enc_strlen(RSTRING_PTR(tmp),RSTRING_END(tmp),enc) != 1) { rb_raise(rb_eArgError, "%%c requires a character"); } - c = rb_enc_codepoint(RSTRING_PTR(tmp), RSTRING_END(tmp), enc); + c = rb_enc_codepoint_len(RSTRING_PTR(tmp), RSTRING_END(tmp), &n, enc); } else { c = NUM2INT(val); + n = rb_enc_codelen(c, enc); } - n = rb_enc_codelen(c, enc); if (n <= 0) { rb_raise(rb_eArgError, "invalid character"); } if (!(flags & FWIDTH)) { CHECK(n); @@ -651,10 +681,17 @@ if (*p == 'p') arg = rb_inspect(arg); str = rb_obj_as_string(arg); if (OBJ_TAINTED(str)) tainted = 1; len = RSTRING_LEN(str); rb_str_set_len(result, blen); + if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) { + int cr = coderange; + scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &cr); + ENC_CODERANGE_SET(result, + (cr == ENC_CODERANGE_UNKNOWN ? + ENC_CODERANGE_BROKEN : (coderange = cr))); + } enc = rb_enc_check(result, str); if (flags&(FPREC|FWIDTH)) { slen = rb_enc_strlen(RSTRING_PTR(str),RSTRING_END(str),enc); if (slen < 0) { rb_raise(rb_eArgError, "invalid mbstring sequence"); @@ -665,11 +702,11 @@ slen = prec; len = p - RSTRING_PTR(str); } /* need to adjust multi-byte string pos */ if ((flags&FWIDTH) && (width > slen)) { - width -= slen; + width -= (int)slen; if (!(flags&FMINUS)) { CHECK(width); while (width--) { buf[blen++] = ' '; } @@ -749,11 +786,11 @@ val = rb_dbl2big(RFLOAT_VALUE(val)); if (FIXNUM_P(val)) goto bin_retry; bignum = 1; break; case T_STRING: - val = rb_str_to_inum(val, 0, Qtrue); + val = rb_str_to_inum(val, 0, TRUE); goto bin_retry; case T_BIGNUM: bignum = 1; break; case T_FIXNUM: @@ -825,10 +862,11 @@ if (d && *s != d) { *--s = d; } } } + len = (int)strlen(s); } else { if (sign) { tmp = rb_big2str(val, base); s = RSTRING_PTR(tmp); @@ -867,14 +905,14 @@ case 2: if (s[0] != '1') *--s = '1'; break; } } } + len = rb_long2int(RSTRING_END(tmp) - s); } pos = -1; - len = strlen(s); if (dots) { prec -= 2; width -= 2; } @@ -900,11 +938,11 @@ } else if (len == 1 && *s == '0') { prefix = 0; } if (prefix) { - width -= strlen(prefix); + width -= (int)strlen(prefix); } if ((flags & (FZERO|FMINUS|FPREC)) == FZERO) { prec = width; width = 0; } @@ -921,11 +959,11 @@ buf[blen++] = ' '; } } if (sc) PUSH(&sc, 1); if (prefix) { - int plen = strlen(prefix); + int plen = (int)strlen(prefix); PUSH(prefix, plen); } CHECK(prec - len); if (dots) PUSH("..", 2); if (!bignum && v < 0) { @@ -956,10 +994,12 @@ case 'f': case 'g': case 'G': case 'e': case 'E': + case 'a': + case 'A': { VALUE val = GETARG(); double fval; int i, need = 6; char fbuf[32]; @@ -972,11 +1012,11 @@ expr = "NaN"; } else { expr = "Inf"; } - need = strlen(expr); + need = (int)strlen(expr); if ((!isnan(fval) && fval < 0.0) || (flags & FPLUS)) need++; if ((flags & FWIDTH) && need < width) need = width; @@ -987,21 +1027,21 @@ buf[blen++] = '-'; else if (flags & FPLUS) buf[blen++] = '+'; else if (flags & FSPACE) blen++; - strncpy(&buf[blen], expr, strlen(expr)); + memcpy(&buf[blen], expr, strlen(expr)); } else { if (!isnan(fval) && fval < 0.0) buf[blen + need - strlen(expr) - 1] = '-'; else if (flags & FPLUS) buf[blen + need - strlen(expr) - 1] = '+'; else if ((flags & FSPACE) && need > width) blen++; - strncpy(&buf[blen + need - strlen(expr)], expr, - strlen(expr)); + memcpy(&buf[blen + need - strlen(expr)], expr, + strlen(expr)); } blen += strlen(&buf[blen]); break; } @@ -1080,13 +1120,12 @@ # define _HAVE_LLP64_ # define quad_t LONG_LONG # define u_quad_t unsigned LONG_LONG # endif #endif -#undef snprintf #define FLOATING_POINT 1 #define BSD__dtoa ruby_dtoa -#include "missing/vsnprintf.c" +#include "vsnprintf.c" static int ruby__sfvwrite(register rb_printf_buffer *fp, register struct __suio *uio) { struct __siov *iov;