sprintf.c in rhodes-3.2.0.beta.1

- old
+ new

@@ -29,11 +29,11 @@
 
 static char*
 remove_sign_bits(char *str, int base)
 {
     char *s, *t;
-    
+
     s = t = str;
 
     if (base == 16) {
 	while (*t == 'f') {
 	    t++;
@@ -81,14 +81,16 @@
 #define FWIDTH 32
 #define FPREC  64
 #define FPREC0 128
 
 #define CHECK(l) do {\
+    int cr = ENC_CODERANGE(result);\
     while (blen + (l) >= bsiz) {\
 	bsiz*=2;\
     }\
     rb_str_resize(result, bsiz);\
+    ENC_CODERANGE_SET(result, cr);\
     buf = RSTRING_PTR(result);\
 } while (0)
 
 #define PUSH(s, l) do { \
     CHECK(l);\
@@ -117,15 +119,16 @@
 	       (posarg = -1, GETNTHARG(n))))
 
 #define GETNTHARG(nth) \
     ((nth >= argc) ? (rb_raise(rb_eArgError, "too few arguments"), 0) : argv[nth])
 
-#define GETNAMEARG(id) (posarg > 0 ? \
-    (rb_raise(rb_eArgError, "named after unnumbered(%d)", posarg), 0) : \
+#define GETNAMEARG(id, name, len) ( \
+    posarg > 0 ? \
+    (rb_raise(rb_eArgError, "named%.*s after unnumbered(%d)", (len), (name), posarg), 0) : \
     posarg == -1 ? \
-    (rb_raise(rb_eArgError, "named after numbered"), 0) : \
-    rb_hash_fetch(get_hash(&hash, argc, argv), id))
+    (rb_raise(rb_eArgError, "named%.*s after numbered", (len), (name)), 0) :	\
+    (posarg = -2, rb_hash_lookup2(get_hash(&hash, argc, argv), id, Qundef)))
 
 #define GETNUM(n, val) \
     for (; p < end && rb_enc_isdigit(*p, enc); p++) {	\
 	int next_n = 10 * n + (*p - '0'); \
         if (next_n / 10 != n) {\
@@ -167,16 +170,16 @@
     return (*hash = tmp);
 }
 
 /*
  *  call-seq:
- *     format(format_string [, arguments...] )   => string
- *     sprintf(format_string [, arguments...] )  => string
- *  
+ *     format(format_string [, arguments...] )   -> string
+ *     sprintf(format_string [, arguments...] )  -> string
+ *
  *  Returns the string resulting from applying <i>format_string</i> to
  *  any additional arguments.  Within the format string, any characters
- *  other than format sequences are copied to the result. 
+ *  other than format sequences are copied to the result.
  *
  *  The syntax of a format sequence is follows.
  *
  *    %[flags][width][.precision]type
  *
@@ -208,79 +211,83 @@
  *            | leading 'ff's).
  *        X   | Equivalent to `x', but uses uppercase letters.
  *
  *      Field |  Float Format
  *      ------+--------------------------------------------------------------
- *        e   | Convert floating point argument into exponential notation 
+ *        e   | Convert floating point argument into exponential notation
  *            | with one digit before the decimal point as [-]d.dddddde[+-]dd.
  *            | The precision specifies the number of digits after the decimal
  *            | point (defaulting to six).
  *        E   | Equivalent to `e', but uses an uppercase E to indicate
  *            | the exponent.
- *        f   | Convert floating point argument as [-]ddd.dddddd, 
+ *        f   | Convert floating point argument as [-]ddd.dddddd,
  *            | where the precision specifies the number of digits after
  *            | the decimal point.
  *        g   | Convert a floating point number using exponential form
  *            | if the exponent is less than -4 or greater than or
  *            | equal to the precision, or in dd.dddd form otherwise.
  *            | The precision specifies the number of significant digits.
  *        G   | Equivalent to `g', but use an uppercase `E' in exponent form.
+ *        a   | Convert floating point argument as [-]0xh.hhhhp[+-]dd,
+ *            | which is consisted from optional sign, "0x", fraction part
+ *            | as hexadecimal, "p", and exponential part as decimal.
+ *        A   | Equivalent to `a', but use uppercase `X' and `P'.
  *
  *      Field |  Other Format
  *      ------+--------------------------------------------------------------
  *        c   | Argument is the numeric code for a single character or
  *            | a single character string itself.
  *        p   | The valuing of argument.inspect.
  *        s   | Argument is a string to be substituted.  If the format
  *            | sequence contains a precision, at most that many characters
  *            | will be copied.
  *        %   | A percent sign itself will be displayed.  No argument taken.
- *     
+ *
  *  The flags modifies the behavior of the formats.
  *  The flag characters are:
  *
  *    Flag     | Applies to    | Meaning
  *    ---------+---------------+-----------------------------------------
- *    space    | bBdiouxX      | Leave a space at the start of 
- *             | eEfgG         | non-negative numbers.
+ *    space    | bBdiouxX      | Leave a space at the start of
+ *             | aAeEfgG       | non-negative numbers.
  *             | (numeric fmt) | For `o', `x', `X', `b' and `B', use
  *             |               | a minus sign with absolute value for
  *             |               | negative values.
  *    ---------+---------------+-----------------------------------------
  *    (digit)$ | all           | Specifies the absolute argument number
  *             |               | for this field.  Absolute and relative
  *             |               | argument numbers cannot be mixed in a
  *             |               | sprintf string.
  *    ---------+---------------+-----------------------------------------
  *     #       | bBoxX         | Use an alternative format.
- *             | eEfgG         | For the conversions `o', increase the precision
+ *             | aAeEfgG       | For the conversions `o', increase the precision
  *             |               | until the first digit will be `0' if
  *             |               | it is not formatted as complements.
  *             |               | For the conversions `x', `X', `b' and `B'
  *             |               | on non-zero, prefix the result with ``0x'',
  *             |               | ``0X'', ``0b'' and ``0B'', respectively.
- *             |               | For `e', `E', `f', `g', and 'G',
+ *             |               | For `a', `A', `e', `E', `f', `g', and 'G',
  *             |               | force a decimal point to be added,
  *             |               | even if no digits follow.
  *             |               | For `g' and 'G', do not remove trailing zeros.
  *    ---------+---------------+-----------------------------------------
  *    +        | bBdiouxX      | Add a leading plus sign to non-negative
- *             | eEfgG         | numbers.
+ *             | aAeEfgG       | numbers.
  *             | (numeric fmt) | For `o', `x', `X', `b' and `B', use
  *             |               | a minus sign with absolute value for
  *             |               | negative values.
  *    ---------+---------------+-----------------------------------------
  *    -        | all           | Left-justify the result of this conversion.
  *    ---------+---------------+-----------------------------------------
  *    0 (zero) | bBdiouxX      | Pad with zeros, not spaces.
- *             | eEfgG         | For `o', `x', `X', `b' and `B', radix-1
+ *             | aAeEfgG       | For `o', `x', `X', `b' and `B', radix-1
  *             | (numeric fmt) | is used for negative numbers formatted as
  *             |               | complements.
  *    ---------+---------------+-----------------------------------------
- *    *        | all           | Use the next argument as the field width. 
+ *    *        | all           | Use the next argument as the field width.
  *             |               | If negative, left-justify the result. If the
- *             |               | asterisk is followed by a number and a dollar 
+ *             |               | asterisk is followed by a number and a dollar
  *             |               | sign, use the indicated argument as the width.
  *
  *  Examples of flags:
  *
  *   # `+' and space flag specifies the sign of non-negative numbers.
@@ -334,11 +341,11 @@
  *   # It also disables stripping lowest zeros.
  *   sprintf("%g", 123.4)   #=> "123.4"
  *   sprintf("%#g", 123.4)  #=> "123.400"
  *   sprintf("%g", 123456)  #=> "123456"
  *   sprintf("%#g", 123456) #=> "123456."
- *     
+ *
  *  The field width is an optional integer, followed optionally by a
  *  period and a precision.  The width specifies the minimum number of
  *  characters that will be written to the result for this field.
  *
  *  Examples of width:
@@ -387,11 +394,11 @@
  *   sprintf("%#20.8b", -11)  #=> "          0b..110101"
  *
  *   # precision for `e' is number of
  *   # digits after the decimal point           <------>
  *   sprintf("%20.8e", 1234.56789) #=> "      1.23456789e+03"
- *                                    
+ *
  *   # precision for `f' is number of
  *   # digits after the decimal point               <------>
  *   sprintf("%20.8f", 1234.56789) #=> "       1234.56789000"
  *
  *   # precision for `g' is number of
@@ -411,10 +418,19 @@
  *     sprintf("%08b '%4s'", 123, 123)            #=> "01111011 ' 123'"
  *     sprintf("%1$*2$s %2$d %1$s", "hello", 8)   #=> "   hello 8 hello"
  *     sprintf("%1$*2$s %2$d", "hello", -8)       #=> "hello    -8"
  *     sprintf("%+g:% g:%-g", 1.23, 1.23, 1.23)   #=> "+1.23: 1.23:1.23"
  *     sprintf("%u", -123)                        #=> "-123"
+ *
+ *  For more complex formatting, Ruby supports a reference by name.
+ *  %<name>s style uses format style, but %{name} style doesn't.
+ *
+ *  Exapmles:
+ *    sprintf("%<foo>d : %<bar>f", { :foo => 1, :bar => 2 })
+ *      #=> 1 : 2.000000
+ *    sprintf("%{foo}f", { :foo => 1 })
+ *      # => "1f"
  */
 
 VALUE
 rb_f_sprintf(int argc, const VALUE *argv)
 {
@@ -425,13 +441,15 @@
 rb_str_format(int argc, const VALUE *argv, VALUE fmt)
 {
     rb_encoding *enc;
     const char *p, *end;
     char *buf;
-    int blen, bsiz;
+    long blen, bsiz;
     VALUE result;
 
+    long scanned = 0;
+    int coderange = ENC_CODERANGE_7BIT;
     int width, prec, flags = FNONE;
     int nextarg = 1;
     int posarg = 0;
     int tainted = 0;
     VALUE nextvalue;
@@ -466,17 +484,23 @@
     bsiz = 120;
     result = rb_str_buf_new(bsiz);
     rb_enc_copy(result, fmt);
     buf = RSTRING_PTR(result);
     memset(buf, 0, bsiz);
+    ENC_CODERANGE_SET(result, coderange);
 
     for (; p < end; p++) {
 	const char *t;
 	int n;
+	ID id = 0;
 
 	for (t = p; t < end && *t != '%'; t++) ;
 	PUSH(p, t - p);
+	if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) {
+	    scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &coderange);
+	    ENC_CODERANGE_SET(result, coderange);
+	}
 	if (t >= end) {
 	    /* end of fmt string */
 	    goto sprint_exit;
 	}
 	p = t + 1;		/* skip `%' */
@@ -542,20 +566,26 @@
 	  case '<':
 	  case '{':
 	    {
 		const char *start = p;
 		char term = (*p == '<') ? '>' : '}';
-		ID id;
 
 		for (; p < end && *p != term; ) {
 		    p += rb_enc_mbclen(p, end, enc);
 		}
 		if (p >= end) {
 		    rb_raise(rb_eArgError, "malformed name - unmatched parenthesis");
 		}
+		if (id) {
+		    rb_raise(rb_eArgError, "name%.*s after <%s>",
+			     (int)(p - start + 1), start, rb_id2name(id));
+		}
 		id = rb_intern3(start + 1, p - start - 1, enc);
-		nextvalue = GETNAMEARG(ID2SYM(id));
+		nextvalue = GETNAMEARG(ID2SYM(id), start, (int)(p - start + 1));
+		if (nextvalue == Qundef) {
+		    rb_raise(rb_eKeyError, "key%.*s not found", (int)(p - start + 1), start);
+		}
 		if (term == '}') goto format_s;
 		p++;
 		goto retry;
 	    }
 
@@ -610,16 +640,16 @@
 		tmp = rb_check_string_type(val);
 		if (!NIL_P(tmp)) {
 		    if (rb_enc_strlen(RSTRING_PTR(tmp),RSTRING_END(tmp),enc) != 1) {
 			rb_raise(rb_eArgError, "%%c requires a character");
 		    }
-		    c = rb_enc_codepoint(RSTRING_PTR(tmp), RSTRING_END(tmp), enc);
+		    c = rb_enc_codepoint_len(RSTRING_PTR(tmp), RSTRING_END(tmp), &n, enc);
 		}
 		else {
 		    c = NUM2INT(val);
+		    n = rb_enc_codelen(c, enc);
 		}
-		n = rb_enc_codelen(c, enc);
 		if (n <= 0) {
 		    rb_raise(rb_eArgError, "invalid character");
 		}
 		if (!(flags & FWIDTH)) {
 		    CHECK(n);
@@ -651,10 +681,17 @@
 		if (*p == 'p') arg = rb_inspect(arg);
 		str = rb_obj_as_string(arg);
 		if (OBJ_TAINTED(str)) tainted = 1;
 		len = RSTRING_LEN(str);
 		rb_str_set_len(result, blen);
+		if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) {
+		    int cr = coderange;
+		    scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &cr);
+		    ENC_CODERANGE_SET(result,
+				      (cr == ENC_CODERANGE_UNKNOWN ?
+				       ENC_CODERANGE_BROKEN : (coderange = cr)));
+		}
 		enc = rb_enc_check(result, str);
 		if (flags&(FPREC|FWIDTH)) {
 		    slen = rb_enc_strlen(RSTRING_PTR(str),RSTRING_END(str),enc);
 		    if (slen < 0) {
 			rb_raise(rb_eArgError, "invalid mbstring sequence");
@@ -665,11 +702,11 @@
 			slen = prec;
 			len = p - RSTRING_PTR(str);
 		    }
 		    /* need to adjust multi-byte string pos */
 		    if ((flags&FWIDTH) && (width > slen)) {
-			width -= slen;
+			width -= (int)slen;
 			if (!(flags&FMINUS)) {
 			    CHECK(width);
 			    while (width--) {
 				buf[blen++] = ' ';
 			    }
@@ -749,11 +786,11 @@
 		    val = rb_dbl2big(RFLOAT_VALUE(val));
 		    if (FIXNUM_P(val)) goto bin_retry;
 		    bignum = 1;
 		    break;
 		  case T_STRING:
-		    val = rb_str_to_inum(val, 0, Qtrue);
+		    val = rb_str_to_inum(val, 0, TRUE);
 		    goto bin_retry;
 		  case T_BIGNUM:
 		    bignum = 1;
 		    break;
 		  case T_FIXNUM:
@@ -825,10 +862,11 @@
 			    if (d && *s != d) {
 				*--s = d;
 			    }
 			}
 		    }
+		    len = (int)strlen(s);
 		}
 		else {
 		    if (sign) {
 			tmp = rb_big2str(val, base);
 			s = RSTRING_PTR(tmp);
@@ -867,14 +905,14 @@
 			      case 2:
 				if (s[0] != '1') *--s = '1'; break;
 			    }
 			}
 		    }
+		    len = rb_long2int(RSTRING_END(tmp) - s);
 		}
 
 		pos = -1;
-		len = strlen(s);
 		if (dots) {
 		    prec -= 2;
 		    width -= 2;
 		}
 
@@ -900,11 +938,11 @@
 		}
 		else if (len == 1 && *s == '0') {
 		    prefix = 0;
 		}
 		if (prefix) {
-		    width -= strlen(prefix);
+		    width -= (int)strlen(prefix);
 		}
 		if ((flags & (FZERO|FMINUS|FPREC)) == FZERO) {
 		    prec = width;
 		    width = 0;
 		}
@@ -921,11 +959,11 @@
 			buf[blen++] = ' ';
 		    }
 		}
 		if (sc) PUSH(&sc, 1);
 		if (prefix) {
-		    int plen = strlen(prefix);
+		    int plen = (int)strlen(prefix);
 		    PUSH(prefix, plen);
 		}
 		CHECK(prec - len);
 		if (dots) PUSH("..", 2);
 		if (!bignum && v < 0) {
@@ -956,10 +994,12 @@
 	  case 'f':
 	  case 'g':
 	  case 'G':
 	  case 'e':
 	  case 'E':
+	  case 'a':
+	  case 'A':
 	    {
 		VALUE val = GETARG();
 		double fval;
 		int i, need = 6;
 		char fbuf[32];
@@ -972,11 +1012,11 @@
 			expr = "NaN";
 		    }
 		    else {
 			expr = "Inf";
 		    }
-		    need = strlen(expr);
+		    need = (int)strlen(expr);
 		    if ((!isnan(fval) && fval < 0.0) || (flags & FPLUS))
 			need++;
 		    if ((flags & FWIDTH) && need < width)
 			need = width;
 
@@ -987,21 +1027,21 @@
 			    buf[blen++] = '-';
 			else if (flags & FPLUS)
 			    buf[blen++] = '+';
 			else if (flags & FSPACE)
 			    blen++;
-			strncpy(&buf[blen], expr, strlen(expr));
+			memcpy(&buf[blen], expr, strlen(expr));
 		    }
 		    else {
 			if (!isnan(fval) && fval < 0.0)
 			    buf[blen + need - strlen(expr) - 1] = '-';
 			else if (flags & FPLUS)
 			    buf[blen + need - strlen(expr) - 1] = '+';
 			else if ((flags & FSPACE) && need > width)
 			    blen++;
-			strncpy(&buf[blen + need - strlen(expr)], expr,
-				strlen(expr));
+			memcpy(&buf[blen + need - strlen(expr)], expr,
+			       strlen(expr));
 		    }
 		    blen += strlen(&buf[blen]);
 		    break;
 		}
 
@@ -1080,13 +1120,12 @@
 #  define _HAVE_LLP64_
 #  define quad_t LONG_LONG
 #  define u_quad_t unsigned LONG_LONG
 # endif
 #endif
-#undef snprintf
 #define FLOATING_POINT 1
 #define BSD__dtoa ruby_dtoa
-#include "missing/vsnprintf.c"
+#include "vsnprintf.c"
 
 static int
 ruby__sfvwrite(register rb_printf_buffer *fp, register struct __suio *uio)
 {
     struct __siov *iov;