ext/pack/pack-26.c in zscan-2.0.5 vs ext/pack/pack-26.c in zscan-2.0.6

- old
+ new

@@ -125,762 +125,10 @@ str_associated(VALUE str) { return rb_ivar_lookup(str, id_associated, Qfalse); } -/* - * call-seq: - * arr.pack( aTemplateString ) -> aBinaryString - * arr.pack( aTemplateString, buffer: aBufferString ) -> aBufferString - * - * Packs the contents of <i>arr</i> into a binary sequence according to - * the directives in <i>aTemplateString</i> (see the table below) - * Directives ``A,'' ``a,'' and ``Z'' may be followed by a count, - * which gives the width of the resulting field. The remaining - * directives also may take a count, indicating the number of array - * elements to convert. If the count is an asterisk - * (``<code>*</code>''), all remaining array elements will be - * converted. Any of the directives ``<code>sSiIlL</code>'' may be - * followed by an underscore (``<code>_</code>'') or - * exclamation mark (``<code>!</code>'') to use the underlying - * platform's native size for the specified type; otherwise, they use a - * platform-independent size. Spaces are ignored in the template - * string. See also <code>String#unpack</code>. - * - * a = [ "a", "b", "c" ] - * n = [ 65, 66, 67 ] - * a.pack("A3A3A3") #=> "a b c " - * a.pack("a3a3a3") #=> "a\000\000b\000\000c\000\000" - * n.pack("ccc") #=> "ABC" - * - * If <i>aBufferString</i> is specified and its capacity is enough, - * +pack+ uses it as the buffer and returns it. - * When the offset is specified by the beginning of <i>aTemplateString</i>, - * the result is filled after the offset. - * If original contents of <i>aBufferString</i> exists and it's longer than - * the offset, the rest of <i>offsetOfBuffer</i> are overwritten by the result. - * If it's shorter, the gap is filled with ``<code>\0</code>''. - * - * Note that ``buffer:'' option does not guarantee not to allocate memory - * in +pack+. If the capacity of <i>aBufferString</i> is not enough, - * +pack+ allocates memory. - * - * Directives for +pack+. - * - * Integer | Array | - * Directive | Element | Meaning - * ---------------------------------------------------------------------------- - * C | Integer | 8-bit unsigned (unsigned char) - * S | Integer | 16-bit unsigned, native endian (uint16_t) - * L | Integer | 32-bit unsigned, native endian (uint32_t) - * Q | Integer | 64-bit unsigned, native endian (uint64_t) - * J | Integer | pointer width unsigned, native endian (uintptr_t) - * | | (J is available since Ruby 2.3.) - * | | - * c | Integer | 8-bit signed (signed char) - * s | Integer | 16-bit signed, native endian (int16_t) - * l | Integer | 32-bit signed, native endian (int32_t) - * q | Integer | 64-bit signed, native endian (int64_t) - * j | Integer | pointer width signed, native endian (intptr_t) - * | | (j is available since Ruby 2.3.) - * | | - * S_ S! | Integer | unsigned short, native endian - * I I_ I! | Integer | unsigned int, native endian - * L_ L! | Integer | unsigned long, native endian - * Q_ Q! | Integer | unsigned long long, native endian (ArgumentError - * | | if the platform has no long long type.) - * | | (Q_ and Q! is available since Ruby 2.1.) - * J! | Integer | uintptr_t, native endian (same with J) - * | | (J! is available since Ruby 2.3.) - * | | - * s_ s! | Integer | signed short, native endian - * i i_ i! | Integer | signed int, native endian - * l_ l! | Integer | signed long, native endian - * q_ q! | Integer | signed long long, native endian (ArgumentError - * | | if the platform has no long long type.) - * | | (q_ and q! is available since Ruby 2.1.) - * j! | Integer | intptr_t, native endian (same with j) - * | | (j! is available since Ruby 2.3.) - * | | - * S> s> S!> s!> | Integer | same as the directives without ">" except - * L> l> L!> l!> | | big endian - * I!> i!> | | (available since Ruby 1.9.3) - * Q> q> Q!> q!> | | "S>" is same as "n" - * J> j> J!> j!> | | "L>" is same as "N" - * | | - * S< s< S!< s!< | Integer | same as the directives without "<" except - * L< l< L!< l!< | | little endian - * I!< i!< | | (available since Ruby 1.9.3) - * Q< q< Q!< q!< | | "S<" is same as "v" - * J< j< J!< j!< | | "L<" is same as "V" - * | | - * n | Integer | 16-bit unsigned, network (big-endian) byte order - * N | Integer | 32-bit unsigned, network (big-endian) byte order - * v | Integer | 16-bit unsigned, VAX (little-endian) byte order - * V | Integer | 32-bit unsigned, VAX (little-endian) byte order - * | | - * U | Integer | UTF-8 character - * w | Integer | BER-compressed integer - * - * Float | Array | - * Directive | Element | Meaning - * --------------------------------------------------------------------------- - * D d | Float | double-precision, native format - * F f | Float | single-precision, native format - * E | Float | double-precision, little-endian byte order - * e | Float | single-precision, little-endian byte order - * G | Float | double-precision, network (big-endian) byte order - * g | Float | single-precision, network (big-endian) byte order - * - * String | Array | - * Directive | Element | Meaning - * --------------------------------------------------------------------------- - * A | String | arbitrary binary string (space padded, count is width) - * a | String | arbitrary binary string (null padded, count is width) - * Z | String | same as ``a'', except that null is added with * - * B | String | bit string (MSB first) - * b | String | bit string (LSB first) - * H | String | hex string (high nibble first) - * h | String | hex string (low nibble first) - * u | String | UU-encoded string - * M | String | quoted printable, MIME encoding (see also RFC2045) - * | | (text mode but input must use LF and output LF) - * m | String | base64 encoded string (see RFC 2045, count is width) - * | | (if count is 0, no line feed are added, see RFC 4648) - * P | String | pointer to a structure (fixed-length string) - * p | String | pointer to a null-terminated string - * - * Misc. | Array | - * Directive | Element | Meaning - * --------------------------------------------------------------------------- - * @ | --- | moves to absolute position - * X | --- | back up a byte - * x | --- | null byte - */ - -static VALUE -pack_pack(int argc, VALUE *argv, VALUE ary) -{ - static const char nul10[] = "\0\0\0\0\0\0\0\0\0\0"; - static const char spc10[] = " "; - const char *p, *pend; - VALUE fmt, opt = Qnil, res, from, associates = 0, buffer = 0; - char type; - long len, idx, plen; - const char *ptr; - int enc_info = 1; /* 0 - BINARY, 1 - US-ASCII, 2 - UTF-8 */ -#ifdef NATINT_PACK - int natint; /* native integer */ -#endif - int integer_size, bigendian_p; - - rb_scan_args(argc, argv, "10:", &fmt, &opt); - - StringValue(fmt); - p = RSTRING_PTR(fmt); - pend = p + RSTRING_LEN(fmt); - if (!NIL_P(opt)) { - static ID keyword_ids[1]; - if (!keyword_ids[0]) - CONST_ID(keyword_ids[0], "buffer"); - - rb_get_kwargs(opt, keyword_ids, 0, 1, &buffer); - - if (buffer != Qundef && !RB_TYPE_P(buffer, T_STRING)) - rb_raise(rb_eTypeError, "buffer must be String, not %s", rb_obj_classname(buffer)); - } - if (buffer) - res = buffer; - else - res = rb_str_buf_new(0); - - idx = 0; - -#define TOO_FEW (rb_raise(rb_eArgError, toofew), 0) -#define MORE_ITEM (idx < RARRAY_LEN(ary)) -#define THISFROM (MORE_ITEM ? RARRAY_AREF(ary, idx) : TOO_FEW) -#define NEXTFROM (MORE_ITEM ? RARRAY_AREF(ary, idx++) : TOO_FEW) - - while (p < pend) { - int explicit_endian = 0; - if (RSTRING_PTR(fmt) + RSTRING_LEN(fmt) != pend) { - rb_raise(rb_eRuntimeError, "format string modified"); - } - type = *p++; /* get data type */ -#ifdef NATINT_PACK - natint = 0; -#endif - - if (ISSPACE(type)) continue; - if (type == '#') { - while ((p < pend) && (*p != '\n')) { - p++; - } - continue; - } - - { - modifiers: - switch (*p) { - case '_': - case '!': - if (strchr(natstr, type)) { -#ifdef NATINT_PACK - natint = 1; -#endif - p++; - } - else { - rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr); - } - goto modifiers; - - case '<': - case '>': - if (!strchr(endstr, type)) { - rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr); - } - if (explicit_endian) { - rb_raise(rb_eRangeError, "Can't use both '<' and '>'"); - } - explicit_endian = *p++; - goto modifiers; - } - } - - if (*p == '*') { /* set data length */ - len = strchr("@Xxu", type) ? 0 - : strchr("PMm", type) ? 1 - : RARRAY_LEN(ary) - idx; - p++; - } - else if (ISDIGIT(*p)) { - errno = 0; - len = STRTOUL(p, (char**)&p, 10); - if (errno) { - rb_raise(rb_eRangeError, "pack length too big"); - } - } - else { - len = 1; - } - - switch (type) { - case 'U': - /* if encoding is US-ASCII, upgrade to UTF-8 */ - if (enc_info == 1) enc_info = 2; - break; - case 'm': case 'M': case 'u': - /* keep US-ASCII (do nothing) */ - break; - default: - /* fall back to BINARY */ - enc_info = 0; - break; - } - switch (type) { - case 'A': case 'a': case 'Z': - case 'B': case 'b': - case 'H': case 'h': - from = NEXTFROM; - if (NIL_P(from)) { - ptr = ""; - plen = 0; - } - else { - StringValue(from); - ptr = RSTRING_PTR(from); - plen = RSTRING_LEN(from); - OBJ_INFECT(res, from); - } - - if (p[-1] == '*') - len = plen; - - switch (type) { - case 'a': /* arbitrary binary string (null padded) */ - case 'A': /* arbitrary binary string (ASCII space padded) */ - case 'Z': /* null terminated string */ - if (plen >= len) { - rb_str_buf_cat(res, ptr, len); - if (p[-1] == '*' && type == 'Z') - rb_str_buf_cat(res, nul10, 1); - } - else { - rb_str_buf_cat(res, ptr, plen); - len -= plen; - while (len >= 10) { - rb_str_buf_cat(res, (type == 'A')?spc10:nul10, 10); - len -= 10; - } - rb_str_buf_cat(res, (type == 'A')?spc10:nul10, len); - } - break; - -#define castchar(from) (char)((from) & 0xff) - - case 'b': /* bit string (ascending) */ - { - int byte = 0; - long i, j = 0; - - if (len > plen) { - j = (len - plen + 1)/2; - len = plen; - } - for (i=0; i++ < len; ptr++) { - if (*ptr & 1) - byte |= 128; - if (i & 7) - byte >>= 1; - else { - char c = castchar(byte); - rb_str_buf_cat(res, &c, 1); - byte = 0; - } - } - if (len & 7) { - char c; - byte >>= 7 - (len & 7); - c = castchar(byte); - rb_str_buf_cat(res, &c, 1); - } - len = j; - goto grow; - } - break; - - case 'B': /* bit string (descending) */ - { - int byte = 0; - long i, j = 0; - - if (len > plen) { - j = (len - plen + 1)/2; - len = plen; - } - for (i=0; i++ < len; ptr++) { - byte |= *ptr & 1; - if (i & 7) - byte <<= 1; - else { - char c = castchar(byte); - rb_str_buf_cat(res, &c, 1); - byte = 0; - } - } - if (len & 7) { - char c; - byte <<= 7 - (len & 7); - c = castchar(byte); - rb_str_buf_cat(res, &c, 1); - } - len = j; - goto grow; - } - break; - - case 'h': /* hex string (low nibble first) */ - { - int byte = 0; - long i, j = 0; - - if (len > plen) { - j = (len + 1) / 2 - (plen + 1) / 2; - len = plen; - } - for (i=0; i++ < len; ptr++) { - if (ISALPHA(*ptr)) - byte |= (((*ptr & 15) + 9) & 15) << 4; - else - byte |= (*ptr & 15) << 4; - if (i & 1) - byte >>= 4; - else { - char c = castchar(byte); - rb_str_buf_cat(res, &c, 1); - byte = 0; - } - } - if (len & 1) { - char c = castchar(byte); - rb_str_buf_cat(res, &c, 1); - } - len = j; - goto grow; - } - break; - - case 'H': /* hex string (high nibble first) */ - { - int byte = 0; - long i, j = 0; - - if (len > plen) { - j = (len + 1) / 2 - (plen + 1) / 2; - len = plen; - } - for (i=0; i++ < len; ptr++) { - if (ISALPHA(*ptr)) - byte |= ((*ptr & 15) + 9) & 15; - else - byte |= *ptr & 15; - if (i & 1) - byte <<= 4; - else { - char c = castchar(byte); - rb_str_buf_cat(res, &c, 1); - byte = 0; - } - } - if (len & 1) { - char c = castchar(byte); - rb_str_buf_cat(res, &c, 1); - } - len = j; - goto grow; - } - break; - } - break; - - case 'c': /* signed char */ - case 'C': /* unsigned char */ - integer_size = 1; - bigendian_p = BIGENDIAN_P(); /* not effective */ - goto pack_integer; - - case 's': /* s for int16_t, s! for signed short */ - integer_size = NATINT_LEN(short, 2); - bigendian_p = BIGENDIAN_P(); - goto pack_integer; - - case 'S': /* S for uint16_t, S! for unsigned short */ - integer_size = NATINT_LEN(short, 2); - bigendian_p = BIGENDIAN_P(); - goto pack_integer; - - case 'i': /* i and i! for signed int */ - integer_size = (int)sizeof(int); - bigendian_p = BIGENDIAN_P(); - goto pack_integer; - - case 'I': /* I and I! for unsigned int */ - integer_size = (int)sizeof(int); - bigendian_p = BIGENDIAN_P(); - goto pack_integer; - - case 'l': /* l for int32_t, l! for signed long */ - integer_size = NATINT_LEN(long, 4); - bigendian_p = BIGENDIAN_P(); - goto pack_integer; - - case 'L': /* L for uint32_t, L! for unsigned long */ - integer_size = NATINT_LEN(long, 4); - bigendian_p = BIGENDIAN_P(); - goto pack_integer; - - case 'q': /* q for int64_t, q! for signed long long */ - integer_size = NATINT_LEN_Q; - bigendian_p = BIGENDIAN_P(); - goto pack_integer; - - case 'Q': /* Q for uint64_t, Q! for unsigned long long */ - integer_size = NATINT_LEN_Q; - bigendian_p = BIGENDIAN_P(); - goto pack_integer; - - case 'j': /* j for intptr_t */ - integer_size = sizeof(intptr_t); - bigendian_p = BIGENDIAN_P(); - goto pack_integer; - - case 'J': /* J for uintptr_t */ - integer_size = sizeof(uintptr_t); - bigendian_p = BIGENDIAN_P(); - goto pack_integer; - - case 'n': /* 16 bit (2 bytes) integer (network byte-order) */ - integer_size = 2; - bigendian_p = 1; - goto pack_integer; - - case 'N': /* 32 bit (4 bytes) integer (network byte-order) */ - integer_size = 4; - bigendian_p = 1; - goto pack_integer; - - case 'v': /* 16 bit (2 bytes) integer (VAX byte-order) */ - integer_size = 2; - bigendian_p = 0; - goto pack_integer; - - case 'V': /* 32 bit (4 bytes) integer (VAX byte-order) */ - integer_size = 4; - bigendian_p = 0; - goto pack_integer; - - pack_integer: - if (explicit_endian) { - bigendian_p = explicit_endian == '>'; - } - if (integer_size > MAX_INTEGER_PACK_SIZE) - rb_bug("unexpected intger size for pack: %d", integer_size); - while (len-- > 0) { - char intbuf[MAX_INTEGER_PACK_SIZE]; - - from = NEXTFROM; - rb_integer_pack(from, intbuf, integer_size, 1, 0, - INTEGER_PACK_2COMP | - (bigendian_p ? INTEGER_PACK_BIG_ENDIAN : INTEGER_PACK_LITTLE_ENDIAN)); - rb_str_buf_cat(res, intbuf, integer_size); - } - break; - - case 'f': /* single precision float in native format */ - case 'F': /* ditto */ - while (len-- > 0) { - float f; - - from = NEXTFROM; - f = (float)RFLOAT_VALUE(rb_to_float(from)); - rb_str_buf_cat(res, (char*)&f, sizeof(float)); - } - break; - - case 'e': /* single precision float in VAX byte-order */ - while (len-- > 0) { - FLOAT_CONVWITH(tmp); - - from = NEXTFROM; - tmp.f = (float)RFLOAT_VALUE(rb_to_float(from)); - HTOVF(tmp); - rb_str_buf_cat(res, tmp.buf, sizeof(float)); - } - break; - - case 'E': /* double precision float in VAX byte-order */ - while (len-- > 0) { - DOUBLE_CONVWITH(tmp); - from = NEXTFROM; - tmp.d = RFLOAT_VALUE(rb_to_float(from)); - HTOVD(tmp); - rb_str_buf_cat(res, tmp.buf, sizeof(double)); - } - break; - - case 'd': /* double precision float in native format */ - case 'D': /* ditto */ - while (len-- > 0) { - double d; - - from = NEXTFROM; - d = RFLOAT_VALUE(rb_to_float(from)); - rb_str_buf_cat(res, (char*)&d, sizeof(double)); - } - break; - - case 'g': /* single precision float in network byte-order */ - while (len-- > 0) { - FLOAT_CONVWITH(tmp); - from = NEXTFROM; - tmp.f = (float)RFLOAT_VALUE(rb_to_float(from)); - HTONF(tmp); - rb_str_buf_cat(res, tmp.buf, sizeof(float)); - } - break; - - case 'G': /* double precision float in network byte-order */ - while (len-- > 0) { - DOUBLE_CONVWITH(tmp); - - from = NEXTFROM; - tmp.d = RFLOAT_VALUE(rb_to_float(from)); - HTOND(tmp); - rb_str_buf_cat(res, tmp.buf, sizeof(double)); - } - break; - - case 'x': /* null byte */ - grow: - while (len >= 10) { - rb_str_buf_cat(res, nul10, 10); - len -= 10; - } - rb_str_buf_cat(res, nul10, len); - break; - - case 'X': /* back up byte */ - shrink: - plen = RSTRING_LEN(res); - if (plen < len) - rb_raise(rb_eArgError, "X outside of string"); - rb_str_set_len(res, plen - len); - break; - - case '@': /* null fill to absolute position */ - len -= RSTRING_LEN(res); - if (len > 0) goto grow; - len = -len; - if (len > 0) goto shrink; - break; - - case '%': - rb_raise(rb_eArgError, "%% is not supported"); - break; - - case 'U': /* Unicode character */ - while (len-- > 0) { - SIGNED_VALUE l; - char buf[8]; - int le; - - from = NEXTFROM; - from = rb_to_int(from); - l = NUM2LONG(from); - if (l < 0) { - rb_raise(rb_eRangeError, "pack(U): value out of range"); - } - le = rb_uv_to_utf8(buf, l); - rb_str_buf_cat(res, (char*)buf, le); - } - break; - - case 'u': /* uuencoded string */ - case 'm': /* base64 encoded string */ - from = NEXTFROM; - StringValue(from); - ptr = RSTRING_PTR(from); - plen = RSTRING_LEN(from); - - if (len == 0 && type == 'm') { - encodes(res, ptr, plen, type, 0); - ptr += plen; - break; - } - if (len <= 2) - len = 45; - else if (len > 63 && type == 'u') - len = 63; - else - len = len / 3 * 3; - while (plen > 0) { - long todo; - - if (plen > len) - todo = len; - else - todo = plen; - encodes(res, ptr, todo, type, 1); - plen -= todo; - ptr += todo; - } - break; - - case 'M': /* quoted-printable encoded string */ - from = rb_obj_as_string(NEXTFROM); - if (len <= 1) - len = 72; - qpencode(res, from, len); - break; - - case 'P': /* pointer to packed byte string */ - from = THISFROM; - if (!NIL_P(from)) { - StringValue(from); - if (RSTRING_LEN(from) < len) { - rb_raise(rb_eArgError, "too short buffer for P(%ld for %ld)", - RSTRING_LEN(from), len); - } - } - len = 1; - /* FALL THROUGH */ - case 'p': /* pointer to string */ - while (len-- > 0) { - char *t; - from = NEXTFROM; - if (NIL_P(from)) { - t = 0; - } - else { - t = StringValuePtr(from); - rb_obj_taint(from); - } - if (!associates) { - associates = rb_ary_new(); - } - rb_ary_push(associates, from); - rb_str_buf_cat(res, (char*)&t, sizeof(char*)); - } - break; - - case 'w': /* BER compressed integer */ - while (len-- > 0) { - VALUE buf = rb_str_new(0, 0); - size_t numbytes; - int sign; - char *cp; - - from = NEXTFROM; - from = rb_to_int(from); - numbytes = rb_absint_numwords(from, 7, NULL); - if (numbytes == 0) - numbytes = 1; - buf = rb_str_new(NULL, numbytes); - - sign = rb_integer_pack(from, RSTRING_PTR(buf), RSTRING_LEN(buf), 1, 1, INTEGER_PACK_BIG_ENDIAN); - - if (sign < 0) - rb_raise(rb_eArgError, "can't compress negative numbers"); - if (sign == 2) - rb_bug("buffer size problem?"); - - cp = RSTRING_PTR(buf); - while (1 < numbytes) { - *cp |= 0x80; - cp++; - numbytes--; - } - - rb_str_buf_cat(res, RSTRING_PTR(buf), RSTRING_LEN(buf)); - } - break; - - default: { - char unknown[5]; - if (ISPRINT(type)) { - unknown[0] = type; - unknown[1] = '\0'; - } - else { - snprintf(unknown, sizeof(unknown), "\\x%.2x", type & 0xff); - } - rb_warning("unknown pack directive '%s' in '% "PRIsVALUE"'", - unknown, fmt); - break; - } - } - } - - if (associates) { - str_associate(res, associates); - } - OBJ_INFECT(res, fmt); - switch (enc_info) { - case 1: - ENCODING_CODERANGE_SET(res, rb_usascii_encindex(), ENC_CODERANGE_7BIT); - break; - case 2: - rb_enc_set_index(res, rb_utf8_encindex()); - break; - default: - /* do nothing, keep ASCII-8BIT */ - break; - } - return res; -} - static const char uu_table[] = "`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_"; static const char b64_table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; @@ -1035,13 +283,14 @@ /* unpack mode */ #define UNPACK_ARRAY 0 #define UNPACK_BLOCK 1 #define UNPACK_1 2 -static VALUE -pack_unpack_internal(VALUE str, VALUE fmt, int mode) +VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len) { + char* init_s; + int mode = UNPACK_ARRAY; #define hexdigits ruby_hexdigits char *s, *send; char *p, *pend; VALUE ary; char type; @@ -1065,11 +314,11 @@ }\ } while (0) StringValue(str); StringValue(fmt); - s = RSTRING_PTR(str); + init_s = s = RSTRING_PTR(str); send = s + RSTRING_LEN(str); p = RSTRING_PTR(fmt); pend = p + RSTRING_LEN(fmt); ary = mode == UNPACK_ARRAY ? rb_ary_new() : Qnil; @@ -1745,155 +994,14 @@ type, RSTRING_PTR(fmt)); break; } } + *parsed_len = s - init_s; return ary; } -/* - * call-seq: - * str.unpack(format) -> anArray - * - * Decodes <i>str</i> (which may contain binary data) according to the - * format string, returning an array of each value extracted. The - * format string consists of a sequence of single-character directives, - * summarized in the table at the end of this entry. - * Each directive may be followed - * by a number, indicating the number of times to repeat with this - * directive. An asterisk (``<code>*</code>'') will use up all - * remaining elements. The directives <code>sSiIlL</code> may each be - * followed by an underscore (``<code>_</code>'') or - * exclamation mark (``<code>!</code>'') to use the underlying - * platform's native size for the specified type; otherwise, it uses a - * platform-independent consistent size. Spaces are ignored in the - * format string. See also <code>String#unpack1</code>, <code>Array#pack</code>. - * - * "abc \0\0abc \0\0".unpack('A6Z6') #=> ["abc", "abc "] - * "abc \0\0".unpack('a3a3') #=> ["abc", " \000\000"] - * "abc \0abc \0".unpack('Z*Z*') #=> ["abc ", "abc "] - * "aa".unpack('b8B8') #=> ["10000110", "01100001"] - * "aaa".unpack('h2H2c') #=> ["16", "61", 97] - * "\xfe\xff\xfe\xff".unpack('sS') #=> [-2, 65534] - * "now=20is".unpack('M*') #=> ["now is"] - * "whole".unpack('xax2aX2aX1aX2a') #=> ["h", "e", "l", "l", "o"] - * - * This table summarizes the various formats and the Ruby classes - * returned by each. - * - * Integer | | - * Directive | Returns | Meaning - * ------------------------------------------------------------------ - * C | Integer | 8-bit unsigned (unsigned char) - * S | Integer | 16-bit unsigned, native endian (uint16_t) - * L | Integer | 32-bit unsigned, native endian (uint32_t) - * Q | Integer | 64-bit unsigned, native endian (uint64_t) - * J | Integer | pointer width unsigned, native endian (uintptr_t) - * | | - * c | Integer | 8-bit signed (signed char) - * s | Integer | 16-bit signed, native endian (int16_t) - * l | Integer | 32-bit signed, native endian (int32_t) - * q | Integer | 64-bit signed, native endian (int64_t) - * j | Integer | pointer width signed, native endian (intptr_t) - * | | - * S_ S! | Integer | unsigned short, native endian - * I I_ I! | Integer | unsigned int, native endian - * L_ L! | Integer | unsigned long, native endian - * Q_ Q! | Integer | unsigned long long, native endian (ArgumentError - * | | if the platform has no long long type.) - * J! | Integer | uintptr_t, native endian (same with J) - * | | - * s_ s! | Integer | signed short, native endian - * i i_ i! | Integer | signed int, native endian - * l_ l! | Integer | signed long, native endian - * q_ q! | Integer | signed long long, native endian (ArgumentError - * | | if the platform has no long long type.) - * j! | Integer | intptr_t, native endian (same with j) - * | | - * S> s> S!> s!> | Integer | same as the directives without ">" except - * L> l> L!> l!> | | big endian - * I!> i!> | | - * Q> q> Q!> q!> | | "S>" is same as "n" - * J> j> J!> j!> | | "L>" is same as "N" - * | | - * S< s< S!< s!< | Integer | same as the directives without "<" except - * L< l< L!< l!< | | little endian - * I!< i!< | | - * Q< q< Q!< q!< | | "S<" is same as "v" - * J< j< J!< j!< | | "L<" is same as "V" - * | | - * n | Integer | 16-bit unsigned, network (big-endian) byte order - * N | Integer | 32-bit unsigned, network (big-endian) byte order - * v | Integer | 16-bit unsigned, VAX (little-endian) byte order - * V | Integer | 32-bit unsigned, VAX (little-endian) byte order - * | | - * U | Integer | UTF-8 character - * w | Integer | BER-compressed integer (see Array.pack) - * - * Float | | - * Directive | Returns | Meaning - * ----------------------------------------------------------------- - * D d | Float | double-precision, native format - * F f | Float | single-precision, native format - * E | Float | double-precision, little-endian byte order - * e | Float | single-precision, little-endian byte order - * G | Float | double-precision, network (big-endian) byte order - * g | Float | single-precision, network (big-endian) byte order - * - * String | | - * Directive | Returns | Meaning - * ----------------------------------------------------------------- - * A | String | arbitrary binary string (remove trailing nulls and ASCII spaces) - * a | String | arbitrary binary string - * Z | String | null-terminated string - * B | String | bit string (MSB first) - * b | String | bit string (LSB first) - * H | String | hex string (high nibble first) - * h | String | hex string (low nibble first) - * u | String | UU-encoded string - * M | String | quoted-printable, MIME encoding (see RFC2045) - * m | String | base64 encoded string (RFC 2045) (default) - * | | base64 encoded string (RFC 4648) if followed by 0 - * P | String | pointer to a structure (fixed-length string) - * p | String | pointer to a null-terminated string - * - * Misc. | | - * Directive | Returns | Meaning - * ----------------------------------------------------------------- - * @ | --- | skip to the offset given by the length argument - * X | --- | skip backward one byte - * x | --- | skip forward one byte - * - * HISTORY - * - * * J, J! j, and j! are available since Ruby 2.3. - * * Q_, Q!, q_, and q! are available since Ruby 2.1. - * * I!<, i!<, I!>, and i!> are available since Ruby 1.9.3. - */ - -static VALUE -pack_unpack(VALUE str, VALUE fmt) -{ - int mode = rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY; - return pack_unpack_internal(str, fmt, mode); -} - -/* - * call-seq: - * str.unpack1(format) -> obj - * - * Decodes <i>str</i> (which may contain binary data) according to the - * format string, returning the first value extracted. - * See also <code>String#unpack</code>, <code>Array#pack</code>. - */ - -static VALUE -pack_unpack1(VALUE str, VALUE fmt) -{ - return pack_unpack_internal(str, fmt, UNPACK_1); -} - int rb_uv_to_utf8(char buf[6], unsigned long uv) { if (uv <= 0x7f) { buf[0] = (char)uv; @@ -1995,16 +1103,6 @@ n = *lenp - 1; if (uv < utf8_limits[n]) { rb_raise(rb_eArgError, "redundant UTF-8 sequence"); } return uv; -} - -void -Init_pack(void) -{ - rb_define_method(rb_cArray, "pack", pack_pack, -1); - rb_define_method(rb_cString, "unpack", pack_unpack, 1); - rb_define_method(rb_cString, "unpack1", pack_unpack1, 1); - - id_associated = rb_make_internal_id(); }