#ifndef MLIB_INT128_H_INCLUDED #define MLIB_INT128_H_INCLUDED #include "./macros.h" #include "./str.h" #include #include #include #include MLIB_C_LINKAGE_BEGIN /** * @brief A 128-bit binary integer */ typedef union { struct { uint64_t lo; uint64_t hi; } r; #if defined(__SIZEOF_INT128__) // These union members are only for the purpose of debugging visualization // and testing, and will only appear correctly on little-endian platforms. __int128_t signed_; __uint128_t unsigned_; #endif } mlib_int128; /// Define an int128 from a literal within [INT64_MIN, INT64_MAX] #define MLIB_INT128(N) MLIB_INIT(mlib_int128) MLIB_INT128_C(N) /// Define an int128 from a literal within [INT64_MIN, INT64_MAX] (usable as a /// constant init) #define MLIB_INT128_C(N) MLIB_INT128_FROM_PARTS((uint64_t)INT64_C(N), (INT64_C(N) < 0 ? UINT64_MAX : 0)) /** * @brief Cast an integral value to an mlib_int128 * * If the argument is signed and less-than zero, it will be sign-extended */ #define MLIB_INT128_CAST(N) \ MLIB_INIT(mlib_int128) \ MLIB_INT128_FROM_PARTS((uint64_t)(N), ((N) < 0 ? UINT64_MAX : 0)) /** * @brief Create an mlib_int128 from the low and high parts of the integer * * @param LowWord_u64 The low-value 64 bits of the number * @param HighWord_u64 The high-value 64 bits of the number */ #define MLIB_INT128_FROM_PARTS(LowWord_u64, HighWord_u64) \ { {LowWord_u64, HighWord_u64}, } /// Maximum value of int128 when treated as a signed integer #define MLIB_INT128_SMAX MLIB_INT128_FROM_PARTS(UINT64_MAX, UINT64_MAX & ~(UINT64_C(1) << 63)) /// Minimum value of int128, when treated as a signed integer #define MLIB_INT128_SMIN MLIB_INT128_FROM_PARTS(0, UINT64_C(1) << 63) /// Maximum value of int128, when treated as an unsigned integer #define MLIB_INT128_UMAX MLIB_INT128_FROM_PARTS(UINT64_MAX, UINT64_MAX) /** * @brief Compare two 128-bit integers as unsigned integers * * @return (R < 0) if (left < right) * @return (R > 0) if (left > right) * @return (R = 0) if (left == right) */ static mlib_constexpr_fn int mlib_int128_ucmp(mlib_int128 left, mlib_int128 right) { if (left.r.hi > right.r.hi) { return 1; } else if (left.r.hi < right.r.hi) { return -1; } else if (left.r.lo > right.r.lo) { return 1; } else if (left.r.lo < right.r.lo) { return -1; } else { return 0; } } /** * @brief Compare two 128-bit integers as signed integers * * @return (R < 0) if (left < right) * @return (R > 0) if (left > right) * @return (R = 0) if (left == right) */ static mlib_constexpr_fn int mlib_int128_scmp(mlib_int128 left, mlib_int128 right) { if ((left.r.hi & (1ull << 63)) == (right.r.hi & (1ull << 63))) { // Same signed-ness, so they are as comparable as unsigned return mlib_int128_ucmp(left, right); } else if (left.r.hi & (1ull << 63)) { // The left is negative return -1; } else { // The right is negative return 1; } } /** * @brief Determine whether the two 128-bit integers are equal * * @retval true If left == right * @retval false Otherwise */ static mlib_constexpr_fn bool mlib_int128_eq(mlib_int128 left, mlib_int128 right) { return mlib_int128_ucmp(left, right) == 0; } /** * @brief Add two 128-bit integers together * * @return mlib_int128 The sum of the two addends. Overflow will wrap. */ static mlib_constexpr_fn mlib_int128 mlib_int128_add(mlib_int128 left, mlib_int128 right) { uint64_t losum = left.r.lo + right.r.lo; // Overflow check unsigned carry = (losum < left.r.lo || losum < right.r.lo); uint64_t hisum = left.r.hi + right.r.hi + carry; return MLIB_INIT(mlib_int128) MLIB_INT128_FROM_PARTS(losum, hisum); } /** * @brief Treat the given 128-bit integer as signed, and return its * negated value */ static mlib_constexpr_fn mlib_int128 mlib_int128_negate(mlib_int128 v) { mlib_int128 r = MLIB_INT128_FROM_PARTS(~v.r.lo, ~v.r.hi); r = mlib_int128_add(r, MLIB_INT128(1)); return r; } /** * @brief Subtract two 128-bit integers * * @return mlib_int128 The difference between `from` and `less` */ static mlib_constexpr_fn mlib_int128 mlib_int128_sub(mlib_int128 from, mlib_int128 less) { unsigned borrow = from.r.lo < less.r.lo; uint64_t low = from.r.lo - less.r.lo; uint64_t high = from.r.hi - less.r.hi; high -= borrow; return MLIB_INIT(mlib_int128) MLIB_INT128_FROM_PARTS(low, high); } /** * @brief Bitwise left-shift a 128-bit integer * * @param val The value to modify * @param off The offset to shift left. If negative, shifts right * @return The result of the shift operation */ static mlib_constexpr_fn mlib_int128 mlib_int128_lshift(mlib_int128 val, int off) { if (off > 0) { if (off >= 64) { off -= 64; uint64_t high = val.r.lo << off; return MLIB_INIT(mlib_int128) MLIB_INT128_FROM_PARTS(0, high); } else { uint64_t low = val.r.lo << off; uint64_t high = val.r.hi << off; high |= val.r.lo >> (64 - off); return MLIB_INIT(mlib_int128) MLIB_INT128_FROM_PARTS(low, high); } } else if (off < 0) { off = -off; if (off >= 64) { off -= 64; uint64_t low = val.r.hi >> off; return MLIB_INIT(mlib_int128) MLIB_INT128_FROM_PARTS(low, 0); } else { uint64_t high = val.r.hi >> off; uint64_t low = val.r.lo >> off; low |= val.r.hi << (64 - off); return MLIB_INIT(mlib_int128) MLIB_INT128_FROM_PARTS(low, high); } } else { return val; } } /** * @brief Bitwise logical right-shift a 128-bit integer * * @param val The value to modify. No "sign bit" is respected. * @param off The offset to shift right. If negative, shifts left * @return The result of the shift operation */ static mlib_constexpr_fn mlib_int128 mlib_int128_rshift(mlib_int128 val, int off) { return mlib_int128_lshift(val, -off); } /** * @brief Bitwise-or two 128-bit integers */ static mlib_constexpr_fn mlib_int128 mlib_int128_bitor(mlib_int128 l, mlib_int128 r) { return MLIB_INIT(mlib_int128) MLIB_INT128_FROM_PARTS(l.r.lo | r.r.lo, l.r.hi | r.r.hi); } /** * @brief Bitwise-and two 128-bit integers */ static mlib_constexpr_fn mlib_int128 mlib_int128_bitand(mlib_int128 l, mlib_int128 r) { return MLIB_INIT(mlib_int128) MLIB_INT128_FROM_PARTS(l.r.lo & r.r.lo, l.r.hi & r.r.hi); } // Multiply two 64bit integers to get a 128-bit result without overflow static mlib_constexpr_fn mlib_int128 _mlibUnsignedMult128(uint64_t left, uint64_t right) { // Perform a Knuth 4.3.1M multiplication uint32_t u[2] = {(uint32_t)left, (uint32_t)(left >> 32)}; uint32_t v[2] = {(uint32_t)right, (uint32_t)(right >> 32)}; uint32_t w[4] = {0}; for (int j = 0; j < 2; ++j) { uint64_t t = 0; for (int i = 0; i < 2; ++i) { t += (uint64_t)(u[i]) * v[j] + w[i + j]; w[i + j] = (uint32_t)t; t >>= 32; } w[j + 2] = (uint32_t)t; } return MLIB_INIT(mlib_int128) MLIB_INT128_FROM_PARTS(((uint64_t)w[1] << 32) | w[0], ((uint64_t)w[3] << 32) | w[2]); } /** * @brief Multiply two mlib_int128s together. Overflow will wrap. */ static mlib_constexpr_fn mlib_int128 mlib_int128_mul(mlib_int128 l, mlib_int128 r) { // Multiply the low-order word mlib_int128 ret = _mlibUnsignedMult128(l.r.lo, r.r.lo); // Accumulate the high-order parts: ret.r.hi += l.r.lo * r.r.hi; ret.r.hi += l.r.hi * r.r.lo; return ret; } /// Get the number of leading zeros in a 64bit number. static mlib_constexpr_fn int _mlibCountLeadingZeros_u64(uint64_t bits) { int n = 0; if (bits == 0) { return 64; } while (!(1ull << 63 & bits)) { ++n; bits <<= 1; } return n; } static mlib_constexpr_fn int _mlibCountLeadingZeros_u128(mlib_int128 r) { int clz_l = _mlibCountLeadingZeros_u64(r.r.hi); if (clz_l != 64) { return clz_l; } int clz_r = _mlibCountLeadingZeros_u64(r.r.lo); return clz_l + clz_r; } /// Implementation of Knuth's algorithm 4.3.1 D for unsigned integer division static mlib_constexpr_fn void _mlibKnuth431D(uint32_t *const u, const int ulen, const uint32_t *const v, const int vlen, uint32_t *quotient) { // Part D1 (normalization) is done by caller, // normalized in u and v (radix b is 2^32) typedef uint64_t u64; typedef int64_t i64; typedef uint32_t u32; const int m = ulen - vlen - 1; const int n = vlen; // 'd' is 2^32. Shifting left and right is equivalent to mult and division by // d, respectively. // D2 int j = m; for (;;) { // D3: Select two u32 as a u64: u64 two = ((u64)(u[j + n]) << 32) | u[j + n - 1]; // D3: Partial quotient: q̂ u64 q = two / v[n - 1]; // D3: Partial remainder: r̂ u64 r = two % v[n - 1]; // D3: Compute q̂ and r̂ while (q >> 32 || q * (u64)v[n - 2] > (r << 32 | u[j + n - 2])) { q--; r += v[n - 1]; if (r >> 32) { break; } } // D4: Multiply and subtract i64 k = 0; i64 t = 0; for (int i = 0; i < n; ++i) { u64 prod = (u32)q * (u64)(v[i]); t = u[i + j] - k - (u32)prod; u[i + j] = (u32)t; k = (i64)(prod >> 32) - (t >> 32); } t = u[j + n] - k; u[j + n] = (u32)t; quotient[j] = (u32)q; // D5: Test remainder if (t < 0) { // D6: Add back --quotient[j]; k = 0; for (int i = 0; i < n; ++i) { t = u[i + j] + k + v[i]; u[i + j] = (u32)(t); k = t >> 32; } u[j + n] += (u32)k; } // D7: --j; if (j < 0) { break; } } // Denormalization (D8) is done by caller. } /// The result of 128-bit division typedef struct mlib_int128_divmod_result { /// The quotient of the division operation (rounds to zero) mlib_int128 quotient; /// The remainder of the division operation mlib_int128 remainder; } mlib_int128_divmod_result; /// Divide a 128-bit number by a 64bit number. static mlib_constexpr_fn struct mlib_int128_divmod_result _mlibDivide_u128_by_u64(const mlib_int128 numer, const uint64_t denom) { mlib_int128 adjusted = numer; adjusted.r.hi %= denom; int d = _mlibCountLeadingZeros_u64(denom); typedef uint32_t u32; typedef uint64_t u64; if (d >= 32) { // jk: We're dividing by less than UINT32_MAX: We can do a simple short // division of two base32 numbers. // Treat the denominator as a single base32 digit: const u32 d0 = (u32)denom; // And the numerator as four base32 digits: const u64 n0 = (u32)(numer.r.lo); const u64 n1 = (u32)(numer.r.lo >> 32); // We don't need to split n2 and n3. (n3,n2) will be the first partial // dividend const u64 n3_n2 = numer.r.hi; // First partial remainder: (n3,n2) % d0 const u64 r1 = n3_n2 % d0; // Second partial dividend: (r1,n1) const u64 r1_n1 = (r1 << 32) + n1; // Second partial remainder: (r1,n1) % d0 const u64 r0 = r1_n1 % d0; // Final partial dividend: (r0,n0) const u64 r0_n0 = (r0 << 32) + n0; // Final remainder: (r0,n0) % d0 const u64 rem = r0_n0 % d0; // Form the quotient as four base32 digits: // Least quotient digit: (r0,n0) / d0 const u64 q0 = r0_n0 / d0; // Second quotient digit: (r1,n1) / d0 const u64 q1 = r1_n1 / d0; // Third and fourth quotient digit: (n3,n2) / d0 const u64 q3_q2 = n3_n2 / d0; // Low word of the quotient: (q1,q0) const u64 q1_q0 = (q1 << 32) + q0; return MLIB_INIT(mlib_int128_divmod_result){ MLIB_INIT(mlib_int128) MLIB_INT128_FROM_PARTS(q1_q0, q3_q2), MLIB_INIT(mlib_int128) MLIB_INT128_FROM_PARTS(rem, 0), }; } // Normalize for a Knuth 4.3.1D division. Convert the integers into two // base-32 numbers, with u and v being arrays of digits: u32 u[5] = { (u32)(adjusted.r.lo << d), (u32)(adjusted.r.lo >> (32 - d)), (u32)(adjusted.r.hi << d), (u32)(adjusted.r.hi >> (32 - d)), 0, }; if (d != 0) { // Extra bits from overlap: u[2] |= (u32)(adjusted.r.lo >> (64 - d)); u[4] |= (u32)(adjusted.r.hi >> (64 - d)); } u32 v[2] = { (u32)(denom << d), (u32)(denom >> (32 - d)), }; u32 qparts[3] = {0}; _mlibKnuth431D(u, 5, v, 2, qparts); u64 rem = ((u64)u[1] << (32 - d)) | (u[0] >> d); u64 quo = ((u64)qparts[1] << 32) | qparts[0]; return MLIB_INIT(mlib_int128_divmod_result){ MLIB_INIT(mlib_int128) MLIB_INT128_FROM_PARTS(quo, numer.r.hi / denom), MLIB_INIT(mlib_int128) MLIB_INT128_FROM_PARTS(rem, 0), }; } /** * @brief Perform a combined division+remainder of two 128-bit numbers * * @param numer The dividend * @param denom The divisor * @return A struct with .quotient and .remainder results */ static mlib_constexpr_fn mlib_int128_divmod_result mlib_int128_divmod(mlib_int128 numer, mlib_int128 denom) { const uint64_t nhi = numer.r.hi; const uint64_t nlo = numer.r.lo; const uint64_t dhi = denom.r.hi; const uint64_t dlo = denom.r.lo; if (dhi > nhi) { // Denominator is definitely larger than numerator. Quotient is zero, // remainder is full numerator. return MLIB_INIT(mlib_int128_divmod_result){MLIB_INT128(0), numer}; } else if (dhi == nhi) { // High words are equal if (nhi == 0) { // Both high words are zero, so this is just a division of two 64bit // numbers return MLIB_INIT(mlib_int128_divmod_result){ MLIB_INT128_CAST(nlo / dlo), MLIB_INT128_CAST(nlo % dlo), }; } else if (nlo > dlo) { // The numerator is larger than the denom and the high word on the // denom is non-zero, so this cannot divide to anything greater than 1. return MLIB_INIT(mlib_int128_divmod_result){ MLIB_INT128(1), mlib_int128_sub(numer, denom), }; } else if (nlo < dlo) { // numer.r.lo < denom.r.lo and denom.r.hi > denom.r.lo, so the // integer division becomes zero return MLIB_INIT(mlib_int128_divmod_result){ MLIB_INT128(0), numer, }; } else { // N / N is one return MLIB_INIT(mlib_int128_divmod_result){MLIB_INT128(1), MLIB_INT128(0)}; } } else if (dhi == 0) { // No high in denominator. We can use a u128/u64 return _mlibDivide_u128_by_u64(numer, denom.r.lo); } else { // We'll need to do a full u128/u128 division // Normalize for Knuth 4.3.1D int d = _mlibCountLeadingZeros_u64(denom.r.hi); // Does the denom have only three base32 digits? const bool has_three = d >= 32; d &= 31; uint32_t u[5] = { (uint32_t)(numer.r.lo << d), (uint32_t)(numer.r.lo >> (32 - d)), (uint32_t)(numer.r.hi << d), (uint32_t)(numer.r.hi >> (32 - d)), 0, }; uint32_t v[4] = { (uint32_t)(denom.r.lo << d), (uint32_t)(denom.r.lo >> (32 - d)), (uint32_t)(denom.r.hi << d), (uint32_t)(denom.r.hi >> (32 - d)), }; if (d != 0) { u[2] |= (uint32_t)(numer.r.lo >> (64 - d)); u[4] |= (uint32_t)(numer.r.hi >> (64 - d)); v[2] |= (uint32_t)(denom.r.lo >> (64 - d)); }; uint32_t q[2] = {0}; if (has_three) { _mlibKnuth431D(u, 5, v, 3, q); } else { _mlibKnuth431D(u, 5, v, 4, q); } mlib_int128 remainder = MLIB_INT128_FROM_PARTS(((uint64_t)u[1] << 32) | u[0], ((uint64_t)u[3] << 32) | u[2]); remainder = mlib_int128_rshift(remainder, d); return MLIB_INIT(mlib_int128_divmod_result){ MLIB_INT128_CAST(q[0] | (uint64_t)q[1] << 32), remainder, }; } } /** * @brief Perform a division of two 128-bit numbers */ static mlib_constexpr_fn mlib_int128 mlib_int128_div(mlib_int128 numer, mlib_int128 denom) { return mlib_int128_divmod(numer, denom).quotient; } /** * @brief Perform a modulus of two 128-bit numbers */ static mlib_constexpr_fn mlib_int128 mlib_int128_mod(mlib_int128 numer, mlib_int128 denom) { return mlib_int128_divmod(numer, denom).remainder; } /** * @brief Get the nth power of ten as a 128-bit number */ static mlib_constexpr_fn mlib_int128 mlib_int128_pow10(uint8_t nth) { mlib_int128 r = MLIB_INT128(1); while (nth-- > 0) { r = mlib_int128_mul(r, MLIB_INT128(10)); } return r; } /** * @brief Get the Nth power of two as a 128-bit number */ static mlib_constexpr_fn mlib_int128 mlib_int128_pow2(uint8_t nth) { return mlib_int128_lshift(MLIB_INT128(1), (int)nth); } /** * @brief Read a 128-bit unsigned integer from a base-10 string */ static mlib_constexpr_fn mlib_int128 mlib_int128_from_string(const char *s, const char **end) { int radix = 10; if (mlib_strlen(s) > 2 && s[0] == '0') { // Check for a different radix char b = s[1]; if (b == 'b' || b == 'B') { radix = 2; s += 2; } else if (b == 'c' || b == 'C') { radix = 8; s += 2; } else if (b == 'x' || b == 'X') { radix = 16; s += 2; } else { radix = 8; s += 1; } } mlib_int128 ret = MLIB_INT128(0); for (; *s; ++s) { char c = *s; if (c == '\'') { // Digit separator. Skip it; continue; } if (c >= 'a') { // Uppercase (if a letter, otherwise some other punct): c = (char)(c - ('a' - 'A')); } int digit = c - '0'; if (c >= 'A') { // It's actually a letter (or garbage, which we'll catch later) digit = (c - 'A') + 10; } if (digit > radix || digit < 0) { // The digit is outside of our radix, or garbage break; } ret = mlib_int128_mul(ret, MLIB_INT128_CAST(radix)); ret = mlib_int128_add(ret, MLIB_INT128_CAST(digit)); } if (end) { *end = s; } return ret; } /** * @brief Truncate a 128-bit number to a 64-bit number */ static mlib_constexpr_fn uint64_t mlib_int128_to_u64(mlib_int128 v) { return v.r.lo; } /// The result type of formatting a 128-bit number typedef struct { /// The character array of the number as a base10 string. Null-terminated. char str[40]; } mlib_int128_charbuf; /** * @brief Format a 128-bit integer into a string of base10 digits. * * @return mlib_int128_charbuf a struct containing a .str character array */ static mlib_constexpr_fn mlib_int128_charbuf mlib_int128_format(mlib_int128 i) { mlib_int128_charbuf into = {{0}}; char *out = into.str + (sizeof into) - 1; int len = 0; if (mlib_int128_eq(i, MLIB_INT128(0))) { *out-- = '0'; len = 1; } while (!mlib_int128_eq(i, MLIB_INT128(0))) { mlib_int128_divmod_result dm = mlib_int128_divmod(i, MLIB_INT128(10)); uint64_t v = mlib_int128_to_u64(dm.remainder); char digits[] = "0123456789"; char d = digits[v]; *out = d; --out; i = dm.quotient; ++len; } for (int j = 0; j < len; ++j) { into.str[j] = out[j + 1]; } into.str[len] = 0; return into; } MLIB_C_LINKAGE_END #endif // MLIB_INT128_H_INCLUDED