struct Mutable [src]

A arbitrary-precision big integer, with a fixed set of mutable limbs.
Fields

limbs: []LimbRaw digits. These are: Little-endian ordered limbs.len >= 1 Zero is represented as limbs.len == 1 with limbs[0] == 0. Accessing limbs directly should be avoided. These are allocated limbs; the len field tells the valid range.
len: usize
positive: bool
Members

abs (Function)
add (Function)
addSat (Function)
addScalar (Function)
addWrap (Function)
bitAnd (Function)
bitNotWrap (Function)
bitOr (Function)
bitReverse (Function)
bitXor (Function)
byteSwap (Function)
clone (Function)
ConvertError (Error Set)
copy (Function)
divFloor (Function)
divTrunc (Function)
dump (Function)
eqlZero (Function)
format (Function)
formatNumber (Function)
gcd (Function)
gcdNoAlias (Function)
init (Function)
mul (Function)
mulNoAlias (Function)
mulWrap (Function)
mulWrapNoAlias (Function)
negate (Function)
normalize (Function)
popCount (Function)
pow (Function)
readPackedTwosComplement (Function)
readTwosComplement (Function)
saturate (Function)
set (Function)
setFloat (Function)
setString (Function)
setTwosCompIntLimit (Function)
shiftLeft (Function)
shiftLeftSat (Function)
shiftRight (Function)
sqrNoAlias (Function)
sqrt (Function)
sub (Function)
subSat (Function)
subWrap (Function)
swap (Function)
toConst (Function)
toFloat (Type Function)
toInt (Function)
toManaged (Function)
truncate (Function)
Source

  pub const Mutable = struct {
    /// Raw digits. These are:
    ///
    /// * Little-endian ordered
    /// * limbs.len >= 1
    /// * Zero is represented as limbs.len == 1 with limbs[0] == 0.
    ///
    /// Accessing limbs directly should be avoided.
    /// These are allocated limbs; the `len` field tells the valid range.
    limbs: []Limb,
    len: usize,
    positive: bool,

    pub fn toConst(self: Mutable) Const {
        return .{
            .limbs = self.limbs[0..self.len],
            .positive = self.positive,
        };
    }

    pub const ConvertError = Const.ConvertError;

    /// Convert `self` to `Int`.
    ///
    /// Returns an error if self cannot be narrowed into the requested type without truncation.
    pub fn toInt(self: Mutable, comptime Int: type) ConvertError!Int {
        return self.toConst().toInt(Int);
    }

    /// Convert `self` to `Float`.
    pub fn toFloat(self: Mutable, comptime Float: type, round: Round) struct { Float, Exactness } {
        return self.toConst().toFloat(Float, round);
    }

    /// Returns true if `a == 0`.
    pub fn eqlZero(self: Mutable) bool {
        return self.toConst().eqlZero();
    }

    /// Asserts that the allocator owns the limbs memory. If this is not the case,
    /// use `toConst().toManaged()`.
    pub fn toManaged(self: Mutable, allocator: Allocator) Managed {
        return .{
            .allocator = allocator,
            .limbs = self.limbs,
            .metadata = if (self.positive)
                self.len & ~Managed.sign_bit
            else
                self.len | Managed.sign_bit,
        };
    }

    /// `value` is a primitive integer type.
    /// Asserts the value fits within the provided `limbs_buffer`.
    /// Note: `calcLimbLen` can be used to figure out how big an array to allocate for `limbs_buffer`.
    pub fn init(limbs_buffer: []Limb, value: anytype) Mutable {
        limbs_buffer[0] = 0;
        var self: Mutable = .{
            .limbs = limbs_buffer,
            .len = 1,
            .positive = true,
        };
        self.set(value);
        return self;
    }

    /// Copies the value of a Const to an existing Mutable so that they both have the same value.
    /// Asserts the value fits in the limbs buffer.
    pub fn copy(self: *Mutable, other: Const) void {
        if (self.limbs.ptr != other.limbs.ptr) {
            @memcpy(self.limbs[0..other.limbs.len], other.limbs[0..other.limbs.len]);
        }
        // Normalize before setting `positive` so the `eqlZero` doesn't need to iterate
        // over the extra zero limbs.
        self.normalize(other.limbs.len);
        self.positive = other.positive or other.eqlZero();
    }

    /// Efficiently swap an Mutable with another. This swaps the limb pointers and a full copy is not
    /// performed. The address of the limbs field will not be the same after this function.
    pub fn swap(self: *Mutable, other: *Mutable) void {
        mem.swap(Mutable, self, other);
    }

    pub fn dump(self: Mutable) void {
        for (self.limbs[0..self.len]) |limb| {
            std.debug.print("{x} ", .{limb});
        }
        std.debug.print("len={} capacity={} positive={}\n", .{ self.len, self.limbs.len, self.positive });
    }

    /// Clones an Mutable and returns a new Mutable with the same value. The new Mutable is a deep copy and
    /// can be modified separately from the original.
    /// Asserts that limbs is big enough to store the value.
    pub fn clone(other: Mutable, limbs: []Limb) Mutable {
        @memcpy(limbs[0..other.len], other.limbs[0..other.len]);
        return .{
            .limbs = limbs,
            .len = other.len,
            .positive = other.positive,
        };
    }

    pub fn negate(self: *Mutable) void {
        self.positive = !self.positive;
    }

    /// Modify to become the absolute value
    pub fn abs(self: *Mutable) void {
        self.positive = true;
    }

    /// Sets the Mutable to value. Value must be an primitive integer type.
    /// Asserts the value fits within the limbs buffer.
    /// Note: `calcLimbLen` can be used to figure out how big the limbs buffer
    /// needs to be to store a specific value.
    pub fn set(self: *Mutable, value: anytype) void {
        const T = @TypeOf(value);
        const needed_limbs = calcLimbLen(value);
        assert(needed_limbs <= self.limbs.len); // value too big

        self.len = needed_limbs;
        self.positive = value >= 0;

        switch (@typeInfo(T)) {
            .int => |info| {
                var w_value = @abs(value);

                if (info.bits <= limb_bits) {
                    self.limbs[0] = w_value;
                } else {
                    var i: usize = 0;
                    while (true) : (i += 1) {
                        self.limbs[i] = @as(Limb, @truncate(w_value));
                        w_value >>= limb_bits;

                        if (w_value == 0) break;
                    }
                }
            },
            .comptime_int => {
                comptime var w_value = @abs(value);

                if (w_value <= maxInt(Limb)) {
                    self.limbs[0] = w_value;
                } else {
                    const mask = (1 << limb_bits) - 1;

                    comptime var i = 0;
                    inline while (true) : (i += 1) {
                        self.limbs[i] = w_value & mask;
                        w_value >>= limb_bits;

                        if (w_value == 0) break;
                    }
                }
            },
            else => @compileError("cannot set Mutable using type " ++ @typeName(T)),
        }
    }

    /// Set self from the string representation `value`.
    ///
    /// `value` must contain only digits <= `base` and is case insensitive.  Base prefixes are
    /// not allowed (e.g. 0x43 should simply be 43).  Underscores in the input string are
    /// ignored and can be used as digit separators.
    ///
    /// Asserts there is enough memory for the value in `self.limbs`. An upper bound on number of limbs can
    /// be determined with `calcSetStringLimbCount`.
    /// Asserts the base is in the range [2, 36].
    ///
    /// Returns an error if the value has invalid digits for the requested base.
    ///
    /// `limbs_buffer` is used for temporary storage. The size required can be found with
    /// `calcSetStringLimbsBufferLen`.
    ///
    /// If `allocator` is provided, it will be used for temporary storage to improve
    /// multiplication performance. `error.OutOfMemory` is handled with a fallback algorithm.
    pub fn setString(
        self: *Mutable,
        base: u8,
        value: []const u8,
        limbs_buffer: []Limb,
        allocator: ?Allocator,
    ) error{InvalidCharacter}!void {
        assert(base >= 2);
        assert(base <= 36);

        var i: usize = 0;
        var positive = true;
        if (value.len > 0 and value[0] == '-') {
            positive = false;
            i += 1;
        }

        const ap_base: Const = .{ .limbs = &[_]Limb{base}, .positive = true };
        self.set(0);

        for (value[i..]) |ch| {
            if (ch == '_') {
                continue;
            }
            const d = try std.fmt.charToDigit(ch, base);
            const ap_d: Const = .{ .limbs = &[_]Limb{d}, .positive = true };

            self.mul(self.toConst(), ap_base, limbs_buffer, allocator);
            self.add(self.toConst(), ap_d);
        }
        self.positive = positive;
    }

    /// Set self to either bound of a 2s-complement integer.
    /// Note: The result is still sign-magnitude, not twos complement! In order to convert the
    /// result to twos complement, it is sufficient to take the absolute value.
    ///
    /// Asserts the result fits in `r`. An upper bound on the number of limbs needed by
    /// r is `calcTwosCompLimbCount(bit_count)`.
    pub fn setTwosCompIntLimit(
        r: *Mutable,
        limit: TwosCompIntLimit,
        signedness: Signedness,
        bit_count: usize,
    ) void {
        // Handle zero-bit types.
        if (bit_count == 0) {
            r.set(0);
            return;
        }

        const req_limbs = calcTwosCompLimbCount(bit_count);
        const bit: Log2Limb = @truncate(bit_count - 1);
        const signmask = @as(Limb, 1) << bit; // 0b0..010..0 where 1 is the sign bit.
        const mask = (signmask << 1) -% 1; // 0b0..011..1 where the leftmost 1 is the sign bit.

        r.positive = true;

        switch (signedness) {
            .signed => switch (limit) {
                .min => {
                    // Negative bound, signed = -0x80.
                    r.len = req_limbs;
                    @memset(r.limbs[0 .. r.len - 1], 0);
                    r.limbs[r.len - 1] = signmask;
                    r.positive = false;
                },
                .max => {
                    // Positive bound, signed = 0x7F
                    // Note, in this branch we need to normalize because the first bit is
                    // supposed to be 0.

                    // Special case for 1-bit integers.
                    if (bit_count == 1) {
                        r.set(0);
                    } else {
                        const new_req_limbs = calcTwosCompLimbCount(bit_count - 1);
                        const msb = @as(Log2Limb, @truncate(bit_count - 2));
                        const new_signmask = @as(Limb, 1) << msb; // 0b0..010..0 where 1 is the sign bit.
                        const new_mask = (new_signmask << 1) -% 1; // 0b0..001..1 where the rightmost 0 is the sign bit.

                        r.len = new_req_limbs;
                        @memset(r.limbs[0 .. r.len - 1], maxInt(Limb));
                        r.limbs[r.len - 1] = new_mask;
                    }
                },
            },
            .unsigned => switch (limit) {
                .min => {
                    // Min bound, unsigned = 0x00
                    r.set(0);
                },
                .max => {
                    // Max bound, unsigned = 0xFF
                    r.len = req_limbs;
                    @memset(r.limbs[0 .. r.len - 1], maxInt(Limb));
                    r.limbs[r.len - 1] = mask;
                },
            },
        }
    }

    /// Sets the Mutable to a float value rounded according to `round`.
    /// Returns whether the conversion was exact (`round` had no effect on the result).
    pub fn setFloat(self: *Mutable, value: anytype, round: Round) Exactness {
        const Float = @TypeOf(value);
        if (Float == comptime_float) return self.setFloat(@as(f128, value), round);
        const abs_value = @abs(value);
        if (abs_value < 1.0) {
            if (abs_value == 0.0) {
                self.set(0);
                return .exact;
            }
            self.set(@as(i2, round: switch (round) {
                .nearest_even => if (abs_value <= 0.5) 0 else continue :round .away,
                .away => if (value < 0.0) -1 else 1,
                .trunc => 0,
                .floor => -@as(i2, @intFromBool(value < 0.0)),
                .ceil => @intFromBool(value > 0.0),
            }));
            return .inexact;
        }
        const Repr = std.math.FloatRepr(Float);
        const repr: Repr = @bitCast(value);
        const exponent = repr.exponent.unbias();
        assert(exponent >= 0);
        const int_bit: Repr.Mantissa = 1 << (@bitSizeOf(Repr.Mantissa) - 1);
        const mantissa = int_bit | repr.mantissa;
        if (exponent >= @bitSizeOf(Repr.Normalized.Fraction)) {
            self.set(mantissa);
            self.shiftLeft(self.toConst(), @intCast(exponent - @bitSizeOf(Repr.Normalized.Fraction)));
            self.positive = repr.sign == .positive;
            return .exact;
        }
        self.set(mantissa >> @intCast(@bitSizeOf(Repr.Normalized.Fraction) - exponent));
        const round_bits: Repr.Normalized.Fraction = @truncate(mantissa << @intCast(exponent));
        if (round_bits == 0) {
            self.positive = repr.sign == .positive;
            return .exact;
        }
        round: switch (round) {
            .nearest_even => {
                const half: Repr.Normalized.Fraction = 1 << (@bitSizeOf(Repr.Normalized.Fraction) - 1);
                if (round_bits >= half) self.addScalar(self.toConst(), 1);
                if (round_bits == half) self.limbs[0] &= ~@as(Limb, 1);
            },
            .away => self.addScalar(self.toConst(), 1),
            .trunc => {},
            .floor => switch (repr.sign) {
                .positive => {},
                .negative => continue :round .away,
            },
            .ceil => switch (repr.sign) {
                .positive => continue :round .away,
                .negative => {},
            },
        }
        self.positive = repr.sign == .positive;
        return .inexact;
    }

    /// r = a + scalar
    ///
    /// r and a may be aliases.
    /// scalar is a primitive integer type.
    ///
    /// Asserts the result fits in `r`. An upper bound on the number of limbs needed by
    /// r is `@max(a.limbs.len, calcLimbLen(scalar)) + 1`.
    pub fn addScalar(r: *Mutable, a: Const, scalar: anytype) void {
        // Normally we could just determine the number of limbs needed with calcLimbLen,
        // but that is not comptime-known when scalar is not a comptime_int.  Instead, we
        // use calcTwosCompLimbCount for a non-comptime_int scalar, which can be pessimistic
        // in the case that scalar happens to be small in magnitude within its type, but it
        // is well worth being able to use the stack and not needing an allocator passed in.
        // Note that Mutable.init still sets len to calcLimbLen(scalar) in any case.
        const limbs_len = comptime switch (@typeInfo(@TypeOf(scalar))) {
            .comptime_int => calcLimbLen(scalar),
            .int => |info| calcTwosCompLimbCount(info.bits),
            else => @compileError("expected scalar to be an int"),
        };
        var limbs: [limbs_len]Limb = undefined;
        const operand = init(&limbs, scalar).toConst();
        return add(r, a, operand);
    }

    /// Base implementation for addition. Adds `@max(a.limbs.len, b.limbs.len)` elements from a and b,
    /// and returns whether any overflow occurred.
    /// r, a and b may be aliases.
    ///
    /// Asserts r has enough elements to hold the result. The upper bound is `@max(a.limbs.len, b.limbs.len)`.
    fn addCarry(r: *Mutable, a: Const, b: Const) bool {
        if (a.eqlZero()) {
            r.copy(b);
            return false;
        } else if (b.eqlZero()) {
            r.copy(a);
            return false;
        } else if (a.positive != b.positive) {
            if (a.positive) {
                // (a) + (-b) => a - b
                return r.subCarry(a, b.abs());
            } else {
                // (-a) + (b) => b - a
                return r.subCarry(b, a.abs());
            }
        } else {
            r.positive = a.positive;
            if (a.limbs.len >= b.limbs.len) {
                const c = lladdcarry(r.limbs, a.limbs, b.limbs);
                r.normalize(a.limbs.len);
                return c != 0;
            } else {
                const c = lladdcarry(r.limbs, b.limbs, a.limbs);
                r.normalize(b.limbs.len);
                return c != 0;
            }
        }
    }

    /// r = a + b
    ///
    /// r, a and b may be aliases.
    ///
    /// Asserts the result fits in `r`. An upper bound on the number of limbs needed by
    /// r is `@max(a.limbs.len, b.limbs.len) + 1`.
    pub fn add(r: *Mutable, a: Const, b: Const) void {
        if (r.addCarry(a, b)) {
            // Fix up the result. Note that addCarry normalizes by a.limbs.len or b.limbs.len,
            // so we need to set the length here.
            const msl = @max(a.limbs.len, b.limbs.len);
            // `[add|sub]Carry` normalizes by `msl`, so we need to fix up the result manually here.
            // Note, the fact that it normalized means that the intermediary limbs are zero here.
            r.len = msl + 1;
            r.limbs[msl] = 1; // If this panics, there wasn't enough space in `r`.
        }
    }

    /// r = a + b with 2s-complement wrapping semantics. Returns whether overflow occurred.
    /// r, a and b may be aliases
    ///
    /// Asserts the result fits in `r`. An upper bound on the number of limbs needed by
    /// r is `calcTwosCompLimbCount(bit_count)`.
    pub fn addWrap(r: *Mutable, a: Const, b: Const, signedness: Signedness, bit_count: usize) bool {
        const req_limbs = calcTwosCompLimbCount(bit_count);

        // Slice of the upper bits if they exist, these will be ignored and allows us to use addCarry to determine
        // if an overflow occurred.
        const x = Const{
            .positive = a.positive,
            .limbs = a.limbs[0..@min(req_limbs, a.limbs.len)],
        };

        const y = Const{
            .positive = b.positive,
            .limbs = b.limbs[0..@min(req_limbs, b.limbs.len)],
        };

        var carry_truncated = false;
        if (r.addCarry(x, y)) {
            // There are two possibilities here:
            // - We overflowed req_limbs. In this case, the carry is ignored, as it would be removed by
            //   truncate anyway.
            // - a and b had less elements than req_limbs, and those were overflowed. This case needs to be handled.
            //   Note: after this we still might need to wrap.
            const msl = @max(a.limbs.len, b.limbs.len);
            if (msl < req_limbs) {
                r.limbs[msl] = 1;
                r.len = req_limbs;
                @memset(r.limbs[msl + 1 .. req_limbs], 0);
            } else {
                carry_truncated = true;
            }
        }

        if (!r.toConst().fitsInTwosComp(signedness, bit_count)) {
            r.truncate(r.toConst(), signedness, bit_count);
            return true;
        }

        return carry_truncated;
    }

    /// r = a + b with 2s-complement saturating semantics.
    /// r, a and b may be aliases.
    ///
    /// Assets the result fits in `r`. Upper bound on the number of limbs needed by
    /// r is `calcTwosCompLimbCount(bit_count)`.
    pub fn addSat(r: *Mutable, a: Const, b: Const, signedness: Signedness, bit_count: usize) void {
        const req_limbs = calcTwosCompLimbCount(bit_count);

        // Slice of the upper bits if they exist, these will be ignored and allows us to use addCarry to determine
        // if an overflow occurred.
        const x = Const{
            .positive = a.positive,
            .limbs = a.limbs[0..@min(req_limbs, a.limbs.len)],
        };

        const y = Const{
            .positive = b.positive,
            .limbs = b.limbs[0..@min(req_limbs, b.limbs.len)],
        };

        if (r.addCarry(x, y)) {
            // There are two possibilities here:
            // - We overflowed req_limbs, in which case we need to saturate.
            // - a and b had less elements than req_limbs, and those were overflowed.
            //   Note: In this case, might _also_ need to saturate.
            const msl = @max(a.limbs.len, b.limbs.len);
            if (msl < req_limbs) {
                r.limbs[msl] = 1;
                r.len = req_limbs;
                // Note: Saturation may still be required if msl == req_limbs - 1
            } else {
                // Overflowed req_limbs, definitely saturate.
                r.setTwosCompIntLimit(if (r.positive) .max else .min, signedness, bit_count);
            }
        }

        // Saturate if the result didn't fit.
        r.saturate(r.toConst(), signedness, bit_count);
    }

    /// Base implementation for subtraction. Subtracts `@max(a.limbs.len, b.limbs.len)` elements from a and b,
    /// and returns whether any overflow occurred.
    /// r, a and b may be aliases.
    ///
    /// Asserts r has enough elements to hold the result. The upper bound is `@max(a.limbs.len, b.limbs.len)`.
    fn subCarry(r: *Mutable, a: Const, b: Const) bool {
        if (a.eqlZero()) {
            r.copy(b);
            r.positive = !b.positive;
            return false;
        } else if (b.eqlZero()) {
            r.copy(a);
            return false;
        } else if (a.positive != b.positive) {
            if (a.positive) {
                // (a) - (-b) => a + b
                return r.addCarry(a, b.abs());
            } else {
                // (-a) - (b) => -a + -b
                return r.addCarry(a, b.negate());
            }
        } else if (a.positive) {
            if (a.order(b) != .lt) {
                // (a) - (b) => a - b
                const c = llsubcarry(r.limbs, a.limbs, b.limbs);
                r.normalize(a.limbs.len);
                r.positive = true;
                return c != 0;
            } else {
                // (a) - (b) => -b + a => -(b - a)
                const c = llsubcarry(r.limbs, b.limbs, a.limbs);
                r.normalize(b.limbs.len);
                r.positive = false;
                return c != 0;
            }
        } else {
            if (a.order(b) == .lt) {
                // (-a) - (-b) => -(a - b)
                const c = llsubcarry(r.limbs, a.limbs, b.limbs);
                r.normalize(a.limbs.len);
                r.positive = false;
                return c != 0;
            } else {
                // (-a) - (-b) => --b + -a => b - a
                const c = llsubcarry(r.limbs, b.limbs, a.limbs);
                r.normalize(b.limbs.len);
                r.positive = true;
                return c != 0;
            }
        }
    }

    /// r = a - b
    ///
    /// r, a and b may be aliases.
    ///
    /// Asserts the result fits in `r`. An upper bound on the number of limbs needed by
    /// r is `@max(a.limbs.len, b.limbs.len) + 1`. The +1 is not needed if both operands are positive.
    pub fn sub(r: *Mutable, a: Const, b: Const) void {
        r.add(a, b.negate());
    }

    /// r = a - b with 2s-complement wrapping semantics. Returns whether any overflow occurred.
    ///
    /// r, a and b may be aliases
    /// Asserts the result fits in `r`. An upper bound on the number of limbs needed by
    /// r is `calcTwosCompLimbCount(bit_count)`.
    pub fn subWrap(r: *Mutable, a: Const, b: Const, signedness: Signedness, bit_count: usize) bool {
        return r.addWrap(a, b.negate(), signedness, bit_count);
    }

    /// r = a - b with 2s-complement saturating semantics.
    /// r, a and b may be aliases.
    ///
    /// Assets the result fits in `r`. Upper bound on the number of limbs needed by
    /// r is `calcTwosCompLimbCount(bit_count)`.
    pub fn subSat(r: *Mutable, a: Const, b: Const, signedness: Signedness, bit_count: usize) void {
        r.addSat(a, b.negate(), signedness, bit_count);
    }

    /// rma = a * b
    ///
    /// `rma` may alias with `a` or `b`.
    /// `a` and `b` may alias with each other.
    ///
    /// Asserts the result fits in `rma`. An upper bound on the number of limbs needed by
    /// rma is given by `a.limbs.len + b.limbs.len`.
    ///
    /// `limbs_buffer` is used for temporary storage. The amount required is given by `calcMulLimbsBufferLen`.
    pub fn mul(rma: *Mutable, a: Const, b: Const, limbs_buffer: []Limb, allocator: ?Allocator) void {
        var buf_index: usize = 0;

        const a_copy = if (rma.limbs.ptr == a.limbs.ptr) blk: {
            const start = buf_index;
            @memcpy(limbs_buffer[buf_index..][0..a.limbs.len], a.limbs);
            buf_index += a.limbs.len;
            break :blk a.toMutable(limbs_buffer[start..buf_index]).toConst();
        } else a;

        const b_copy = if (rma.limbs.ptr == b.limbs.ptr) blk: {
            const start = buf_index;
            @memcpy(limbs_buffer[buf_index..][0..b.limbs.len], b.limbs);
            buf_index += b.limbs.len;
            break :blk b.toMutable(limbs_buffer[start..buf_index]).toConst();
        } else b;

        return rma.mulNoAlias(a_copy, b_copy, allocator);
    }

    /// rma = a * b
    ///
    /// `rma` may not alias with `a` or `b`.
    /// `a` and `b` may alias with each other.
    ///
    /// Asserts the result fits in `rma`. An upper bound on the number of limbs needed by
    /// rma is given by `a.limbs.len + b.limbs.len`.
    ///
    /// If `allocator` is provided, it will be used for temporary storage to improve
    /// multiplication performance. `error.OutOfMemory` is handled with a fallback algorithm.
    pub fn mulNoAlias(rma: *Mutable, a: Const, b: Const, allocator: ?Allocator) void {
        assert(rma.limbs.ptr != a.limbs.ptr); // illegal aliasing
        assert(rma.limbs.ptr != b.limbs.ptr); // illegal aliasing

        if (a.limbs.len == 1 and b.limbs.len == 1) {
            rma.limbs[0], const overflow_bit = @mulWithOverflow(a.limbs[0], b.limbs[0]);
            if (overflow_bit == 0) {
                rma.len = 1;
                rma.positive = (a.positive == b.positive) or rma.limbs[0] == 0;
                return;
            }
        }

        @memset(rma.limbs[0 .. a.limbs.len + b.limbs.len], 0);

        llmulacc(.add, allocator, rma.limbs, a.limbs, b.limbs);

        rma.normalize(a.limbs.len + b.limbs.len);
        rma.positive = (a.positive == b.positive);
    }

    /// rma = a * b with 2s-complement wrapping semantics.
    ///
    /// `rma` may alias with `a` or `b`.
    /// `a` and `b` may alias with each other.
    ///
    /// Asserts the result fits in `rma`. An upper bound on the number of limbs needed by
    /// rma is given by `a.limbs.len + b.limbs.len`.
    ///
    /// `limbs_buffer` is used for temporary storage. The amount required is given by `calcMulWrapLimbsBufferLen`.
    pub fn mulWrap(
        rma: *Mutable,
        a: Const,
        b: Const,
        signedness: Signedness,
        bit_count: usize,
        limbs_buffer: []Limb,
        allocator: ?Allocator,
    ) void {
        var buf_index: usize = 0;
        const req_limbs = calcTwosCompLimbCount(bit_count);

        const a_copy = if (rma.limbs.ptr == a.limbs.ptr) blk: {
            const start = buf_index;
            const a_len = @min(req_limbs, a.limbs.len);
            @memcpy(limbs_buffer[buf_index..][0..a_len], a.limbs[0..a_len]);
            buf_index += a_len;
            break :blk a.toMutable(limbs_buffer[start..buf_index]).toConst();
        } else a;

        const b_copy = if (rma.limbs.ptr == b.limbs.ptr) blk: {
            const start = buf_index;
            const b_len = @min(req_limbs, b.limbs.len);
            @memcpy(limbs_buffer[buf_index..][0..b_len], b.limbs[0..b_len]);
            buf_index += b_len;
            break :blk a.toMutable(limbs_buffer[start..buf_index]).toConst();
        } else b;

        return rma.mulWrapNoAlias(a_copy, b_copy, signedness, bit_count, allocator);
    }

    /// rma = a * b with 2s-complement wrapping semantics.
    ///
    /// `rma` may not alias with `a` or `b`.
    /// `a` and `b` may alias with each other.
    ///
    /// Asserts the result fits in `rma`. An upper bound on the number of limbs needed by
    /// rma is given by `a.limbs.len + b.limbs.len`.
    ///
    /// If `allocator` is provided, it will be used for temporary storage to improve
    /// multiplication performance. `error.OutOfMemory` is handled with a fallback algorithm.
    pub fn mulWrapNoAlias(
        rma: *Mutable,
        a: Const,
        b: Const,
        signedness: Signedness,
        bit_count: usize,
        allocator: ?Allocator,
    ) void {
        assert(rma.limbs.ptr != a.limbs.ptr); // illegal aliasing
        assert(rma.limbs.ptr != b.limbs.ptr); // illegal aliasing

        const req_limbs = calcTwosCompLimbCount(bit_count);

        // We can ignore the upper bits here, those results will be discarded anyway.
        const a_limbs = a.limbs[0..@min(req_limbs, a.limbs.len)];
        const b_limbs = b.limbs[0..@min(req_limbs, b.limbs.len)];

        @memset(rma.limbs[0..req_limbs], 0);

        llmulacc(.add, allocator, rma.limbs, a_limbs, b_limbs);
        rma.normalize(@min(req_limbs, a.limbs.len + b.limbs.len));
        rma.positive = (a.positive == b.positive);
        rma.truncate(rma.toConst(), signedness, bit_count);
    }

    /// r = @bitReverse(a) with 2s-complement semantics.
    /// r and a may be aliases.
    ///
    /// Asserts the result fits in `r`. Upper bound on the number of limbs needed by
    /// r is `calcTwosCompLimbCount(bit_count)`.
    pub fn bitReverse(r: *Mutable, a: Const, signedness: Signedness, bit_count: usize) void {
        if (bit_count == 0) return;

        r.copy(a);

        const limbs_required = calcTwosCompLimbCount(bit_count);

        if (!a.positive) {
            r.positive = true; // Negate.
            r.bitNotWrap(r.toConst(), .unsigned, bit_count); // Bitwise NOT.
            r.addScalar(r.toConst(), 1); // Add one.
        } else if (limbs_required > a.limbs.len) {
            // Zero-extend to our output length
            for (r.limbs[a.limbs.len..limbs_required]) |*limb| {
                limb.* = 0;
            }
            r.len = limbs_required;
        }

        // 0b0..01..1000 with @log2(@sizeOf(Limb)) consecutive ones
        const endian_mask: usize = (@sizeOf(Limb) - 1) << 3;

        const bytes = std.mem.sliceAsBytes(r.limbs);

        var k: usize = 0;
        while (k < ((bit_count + 1) / 2)) : (k += 1) {
            var i = k;
            var rev_i = bit_count - i - 1;

            // This "endian mask" remaps a low (LE) byte to the corresponding high
            // (BE) byte in the Limb, without changing which limbs we are indexing
            if (native_endian == .big) {
                i ^= endian_mask;
                rev_i ^= endian_mask;
            }

            const bit_i = std.mem.readPackedInt(u1, bytes, i, .little);
            const bit_rev_i = std.mem.readPackedInt(u1, bytes, rev_i, .little);
            std.mem.writePackedInt(u1, bytes, i, bit_rev_i, .little);
            std.mem.writePackedInt(u1, bytes, rev_i, bit_i, .little);
        }

        // Calculate signed-magnitude representation for output
        if (signedness == .signed) {
            const last_bit = switch (native_endian) {
                .little => std.mem.readPackedInt(u1, bytes, bit_count - 1, .little),
                .big => std.mem.readPackedInt(u1, bytes, (bit_count - 1) ^ endian_mask, .little),
            };
            if (last_bit == 1) {
                r.bitNotWrap(r.toConst(), .unsigned, bit_count); // Bitwise NOT.
                r.addScalar(r.toConst(), 1); // Add one.
                r.positive = false; // Negate.
            }
        }
        r.normalize(r.len);
    }

    /// r = @byteSwap(a) with 2s-complement semantics.
    /// r and a may be aliases.
    ///
    /// Asserts the result fits in `r`. Upper bound on the number of limbs needed by
    /// r is `calcTwosCompLimbCount(8*byte_count)`.
    pub fn byteSwap(r: *Mutable, a: Const, signedness: Signedness, byte_count: usize) void {
        if (byte_count == 0) return;

        r.copy(a);
        const limbs_required = calcTwosCompLimbCount(8 * byte_count);

        if (!a.positive) {
            r.positive = true; // Negate.
            r.bitNotWrap(r.toConst(), .unsigned, 8 * byte_count); // Bitwise NOT.
            r.addScalar(r.toConst(), 1); // Add one.
        } else if (limbs_required > a.limbs.len) {
            // Zero-extend to our output length
            for (r.limbs[a.limbs.len..limbs_required]) |*limb| {
                limb.* = 0;
            }
            r.len = limbs_required;
        }

        // 0b0..01..1 with @log2(@sizeOf(Limb)) trailing ones
        const endian_mask: usize = @sizeOf(Limb) - 1;

        var bytes = std.mem.sliceAsBytes(r.limbs);
        assert(bytes.len >= byte_count);

        var k: usize = 0;
        while (k < (byte_count + 1) / 2) : (k += 1) {
            var i = k;
            var rev_i = byte_count - k - 1;

            // This "endian mask" remaps a low (LE) byte to the corresponding high
            // (BE) byte in the Limb, without changing which limbs we are indexing
            if (native_endian == .big) {
                i ^= endian_mask;
                rev_i ^= endian_mask;
            }

            const byte_i = bytes[i];
            const byte_rev_i = bytes[rev_i];
            bytes[rev_i] = byte_i;
            bytes[i] = byte_rev_i;
        }

        // Calculate signed-magnitude representation for output
        if (signedness == .signed) {
            const last_byte = switch (native_endian) {
                .little => bytes[byte_count - 1],
                .big => bytes[(byte_count - 1) ^ endian_mask],
            };

            if (last_byte & (1 << 7) != 0) { // Check sign bit of last byte
                r.bitNotWrap(r.toConst(), .unsigned, 8 * byte_count); // Bitwise NOT.
                r.addScalar(r.toConst(), 1); // Add one.
                r.positive = false; // Negate.
            }
        }
        r.normalize(r.len);
    }

    /// r = @popCount(a) with 2s-complement semantics.
    /// r and a may be aliases.
    ///
    /// Assets the result fits in `r`. Upper bound on the number of limbs needed by
    /// r is `calcTwosCompLimbCount(bit_count)`.
    pub fn popCount(r: *Mutable, a: Const, bit_count: usize) void {
        r.copy(a);

        if (!a.positive) {
            r.positive = true; // Negate.
            r.bitNotWrap(r.toConst(), .unsigned, bit_count); // Bitwise NOT.
            r.addScalar(r.toConst(), 1); // Add one.
        }

        var sum: Limb = 0;
        for (r.limbs[0..r.len]) |limb| {
            sum += @popCount(limb);
        }
        r.set(sum);
    }

    /// rma = a * a
    ///
    /// `rma` may not alias with `a`.
    ///
    /// Asserts the result fits in `rma`. An upper bound on the number of limbs needed by
    /// rma is given by `2 * a.limbs.len + 1`.
    ///
    /// If `allocator` is provided, it will be used for temporary storage to improve
    /// multiplication performance. `error.OutOfMemory` is handled with a fallback algorithm.
    pub fn sqrNoAlias(rma: *Mutable, a: Const, opt_allocator: ?Allocator) void {
        _ = opt_allocator;
        assert(rma.limbs.ptr != a.limbs.ptr); // illegal aliasing

        @memset(rma.limbs, 0);

        llsquareBasecase(rma.limbs, a.limbs);

        rma.normalize(2 * a.limbs.len + 1);
        rma.positive = true;
    }

    /// q = a / b (rem r)
    ///
    /// a / b are floored (rounded towards 0).
    /// q may alias with a or b.
    ///
    /// Asserts there is enough memory to store q and r.
    /// The upper bound for r limb count is `b.limbs.len`.
    /// The upper bound for q limb count is given by `a.limbs`.
    ///
    /// `limbs_buffer` is used for temporary storage. The amount required is given by `calcDivLimbsBufferLen`.
    pub fn divFloor(
        q: *Mutable,
        r: *Mutable,
        a: Const,
        b: Const,
        limbs_buffer: []Limb,
    ) void {
        const sep = a.limbs.len + 2;
        var x = a.toMutable(limbs_buffer[0..sep]);
        var y = b.toMutable(limbs_buffer[sep..]);

        div(q, r, &x, &y);

        // Note, `div` performs truncating division, which satisfies
        // @divTrunc(a, b) * b + @rem(a, b) = a
        // so r = a - @divTrunc(a, b) * b
        // Note,  @rem(a, -b) = @rem(-b, a) = -@rem(a, b) = -@rem(-a, -b)
        // For divTrunc, we want to perform
        // @divFloor(a, b) * b + @mod(a, b) = a
        // Note:
        // @divFloor(-a, b)
        // = @divFloor(a, -b)
        // = -@divCeil(a, b)
        // = -@divFloor(a + b - 1, b)
        // = -@divTrunc(a + b - 1, b)

        // Note (1):
        // @divTrunc(a + b - 1, b) * b + @rem(a + b - 1, b) = a + b - 1
        // = @divTrunc(a + b - 1, b) * b + @rem(a - 1, b) = a + b - 1
        // = @divTrunc(a + b - 1, b) * b + @rem(a - 1, b) - b + 1 = a

        if (a.positive and b.positive) {
            // Positive-positive case, don't need to do anything.
        } else if (a.positive and !b.positive) {
            // a/-b -> q is negative, and so we need to fix flooring.
            // Subtract one to make the division flooring.

            // @divFloor(a, -b) * -b + @mod(a, -b) = a
            // If b divides a exactly, we have @divFloor(a, -b) * -b = a
            // Else, we have @divFloor(a, -b) * -b > a, so @mod(a, -b) becomes negative

            // We have:
            // @divFloor(a, -b) * -b + @mod(a, -b) = a
            // = -@divTrunc(a + b - 1, b) * -b + @mod(a, -b) = a
            // = @divTrunc(a + b - 1, b) * b + @mod(a, -b) = a

            // Substitute a for (1):
            // @divTrunc(a + b - 1, b) * b + @rem(a - 1, b) - b + 1 = @divTrunc(a + b - 1, b) * b + @mod(a, -b)
            // Yields:
            // @mod(a, -b) = @rem(a - 1, b) - b + 1
            // Note that `r` holds @rem(a, b) at this point.
            //
            // If @rem(a, b) is not 0:
            //   @rem(a - 1, b) = @rem(a, b) - 1
            //   => @mod(a, -b) = @rem(a, b) - 1 - b + 1 = @rem(a, b) - b
            // Else:
            //   @rem(a - 1, b) = @rem(a + b - 1, b) = @rem(b - 1, b) = b - 1
            //   => @mod(a, -b) = b - 1 - b + 1 = 0
            if (!r.eqlZero()) {
                q.addScalar(q.toConst(), -1);
                r.positive = true;
                r.sub(r.toConst(), y.toConst().abs());
            }
        } else if (!a.positive and b.positive) {
            // -a/b -> q is negative, and so we need to fix flooring.
            // Subtract one to make the division flooring.

            // @divFloor(-a, b) * b + @mod(-a, b) = a
            // If b divides a exactly, we have @divFloor(-a, b) * b = -a
            // Else, we have @divFloor(-a, b) * b < -a, so @mod(-a, b) becomes positive

            // We have:
            // @divFloor(-a, b) * b + @mod(-a, b) = -a
            // = -@divTrunc(a + b - 1, b) * b + @mod(-a, b) = -a
            // = @divTrunc(a + b - 1, b) * b - @mod(-a, b) = a

            // Substitute a for (1):
            // @divTrunc(a + b - 1, b) * b + @rem(a - 1, b) - b + 1 = @divTrunc(a + b - 1, b) * b - @mod(-a, b)
            // Yields:
            // @rem(a - 1, b) - b + 1 = -@mod(-a, b)
            // => -@mod(-a, b) = @rem(a - 1, b) - b + 1
            // => @mod(-a, b) = -(@rem(a - 1, b) - b + 1) = -@rem(a - 1, b) + b - 1
            //
            // If @rem(a, b) is not 0:
            //   @rem(a - 1, b) = @rem(a, b) - 1
            //   => @mod(-a, b) = -(@rem(a, b) - 1) + b - 1 = -@rem(a, b) + 1 + b - 1 = -@rem(a, b) + b
            // Else :
            //   @rem(a - 1, b) = b - 1
            //   => @mod(-a, b) = -(b - 1) + b - 1 = 0
            if (!r.eqlZero()) {
                q.addScalar(q.toConst(), -1);
                r.positive = false;
                r.add(r.toConst(), y.toConst().abs());
            }
        } else if (!a.positive and !b.positive) {
            // a/b -> q is positive, don't need to do anything to fix flooring.

            // @divFloor(-a, -b) * -b + @mod(-a, -b) = -a
            // If b divides a exactly, we have @divFloor(-a, -b) * -b = -a
            // Else, we have @divFloor(-a, -b) * -b > -a, so @mod(-a, -b) becomes negative

            // We have:
            // @divFloor(-a, -b) * -b + @mod(-a, -b) = -a
            // = @divTrunc(a, b) * -b + @mod(-a, -b) = -a
            // = @divTrunc(a, b) * b - @mod(-a, -b) = a

            // We also have:
            // @divTrunc(a, b) * b + @rem(a, b) = a

            // Substitute a:
            // @divTrunc(a, b) * b + @rem(a, b) = @divTrunc(a, b) * b - @mod(-a, -b)
            // => @rem(a, b) = -@mod(-a, -b)
            // => @mod(-a, -b) = -@rem(a, b)
            r.positive = false;
        }
    }

    /// q = a / b (rem r)
    ///
    /// a / b are truncated (rounded towards -inf).
    /// q may alias with a or b.
    ///
    /// Asserts there is enough memory to store q and r.
    /// The upper bound for r limb count is `b.limbs.len`.
    /// The upper bound for q limb count is given by `a.limbs.len`.
    ///
    /// `limbs_buffer` is used for temporary storage. The amount required is given by `calcDivLimbsBufferLen`.
    pub fn divTrunc(
        q: *Mutable,
        r: *Mutable,
        a: Const,
        b: Const,
        limbs_buffer: []Limb,
    ) void {
        const sep = a.limbs.len + 2;
        var x = a.toMutable(limbs_buffer[0..sep]);
        var y = b.toMutable(limbs_buffer[sep..]);

        div(q, r, &x, &y);
    }

    /// r = a << shift, in other words, r = a * 2^shift
    ///
    /// r and a may alias.
    ///
    /// Asserts there is enough memory to fit the result. The upper bound Limb count is
    /// `a.limbs.len + (shift / (@sizeOf(Limb) * 8))`.
    pub fn shiftLeft(r: *Mutable, a: Const, shift: usize) void {
        const new_len = llshl(r.limbs, a.limbs, shift);
        r.normalize(new_len);
        r.positive = a.positive;
    }

    /// r = a <<| shift with 2s-complement saturating semantics.
    ///
    /// r and a may alias.
    ///
    /// Asserts there is enough memory to fit the result. The upper bound Limb count is
    /// r is `calcTwosCompLimbCount(bit_count)`.
    pub fn shiftLeftSat(r: *Mutable, a: Const, shift: usize, signedness: Signedness, bit_count: usize) void {
        // Special case: When the argument is negative, but the result is supposed to be unsigned,
        // return 0 in all cases.
        if (!a.positive and signedness == .unsigned) {
            r.set(0);
            return;
        }

        // Check whether the shift is going to overflow. This is the case
        // when (in 2s complement) any bit above `bit_count - shift` is set in the unshifted value.
        // Note, the sign bit is not counted here.

        // Handle shifts larger than the target type. This also deals with
        // 0-bit integers.
        if (bit_count <= shift) {
            // In this case, there is only no overflow if `a` is zero.
            if (a.eqlZero()) {
                r.set(0);
            } else {
                r.setTwosCompIntLimit(if (a.positive) .max else .min, signedness, bit_count);
            }
            return;
        }

        const checkbit = bit_count - shift - @intFromBool(signedness == .signed);
        // If `checkbit` and more significant bits are zero, no overflow will take place.

        if (checkbit >= a.limbs.len * limb_bits) {
            // `checkbit` is outside the range of a, so definitely no overflow will take place. We
            // can defer to a normal shift.
            // Note that if `a` is normalized (which we assume), this checks for set bits in the upper limbs.

            // Note, in this case r should already have enough limbs required to perform the normal shift.
            // In this case the shift of the most significant limb may still overflow.
            r.shiftLeft(a, shift);
            return;
        } else if (checkbit < (a.limbs.len - 1) * limb_bits) {
            // `checkbit` is not in the most significant limb. If `a` is normalized the most significant
            // limb will not be zero, so in this case we need to saturate. Note that `a.limbs.len` must be
            // at least one according to normalization rules.

            r.setTwosCompIntLimit(if (a.positive) .max else .min, signedness, bit_count);
            return;
        }

        // Generate a mask with the bits to check in the most significant limb. We'll need to check
        // all bits with equal or more significance than checkbit.
        // const msb = @truncate(Log2Limb, checkbit);
        // const checkmask = (@as(Limb, 1) << msb) -% 1;

        if (a.limbs[a.limbs.len - 1] >> @as(Log2Limb, @truncate(checkbit)) != 0) {
            // Need to saturate.
            r.setTwosCompIntLimit(if (a.positive) .max else .min, signedness, bit_count);
            return;
        }

        // This shift should not be able to overflow, so invoke llshl and normalize manually
        // to avoid the extra required limb.
        const new_len = llshl(r.limbs, a.limbs, shift);
        r.normalize(new_len);
        r.positive = a.positive;
    }

    /// r = a >> shift
    /// r and a may alias.
    ///
    /// Asserts there is enough memory to fit the result. The upper bound Limb count is
    /// `a.limbs.len - (shift / (@bitSizeOf(Limb)))`.
    pub fn shiftRight(r: *Mutable, a: Const, shift: usize) void {
        const full_limbs_shifted_out = shift / limb_bits;
        const remaining_bits_shifted_out = shift % limb_bits;
        if (a.limbs.len <= full_limbs_shifted_out) {
            // Shifting negative numbers converges to -1 instead of 0
            if (a.positive) {
                r.len = 1;
                r.positive = true;
                r.limbs[0] = 0;
            } else {
                r.len = 1;
                r.positive = false;
                r.limbs[0] = 1;
            }
            return;
        }
        const nonzero_negative_shiftout = if (a.positive) false else nonzero: {
            for (a.limbs[0..full_limbs_shifted_out]) |x| {
                if (x != 0)
                    break :nonzero true;
            }
            if (remaining_bits_shifted_out == 0)
                break :nonzero false;
            const not_covered: Log2Limb = @intCast(limb_bits - remaining_bits_shifted_out);
            break :nonzero a.limbs[full_limbs_shifted_out] << not_covered != 0;
        };

        const new_len = llshr(r.limbs, a.limbs, shift);

        r.len = new_len;
        r.positive = a.positive;
        if (nonzero_negative_shiftout) r.addScalar(r.toConst(), -1);
        r.normalize(r.len);
    }

    /// r = ~a under 2s complement wrapping semantics.
    /// r may alias with a.
    ///
    /// Assets that r has enough limbs to store the result. The upper bound Limb count is
    /// r is `calcTwosCompLimbCount(bit_count)`.
    pub fn bitNotWrap(r: *Mutable, a: Const, signedness: Signedness, bit_count: usize) void {
        r.copy(a.negate());
        const negative_one = Const{ .limbs = &.{1}, .positive = false };
        _ = r.addWrap(r.toConst(), negative_one, signedness, bit_count);
    }

    /// r = a | b under 2s complement semantics.
    /// r may alias with a or b.
    ///
    /// a and b are zero-extended to the longer of a or b.
    ///
    /// Asserts that r has enough limbs to store the result. Upper bound is `@max(a.limbs.len, b.limbs.len)`.
    pub fn bitOr(r: *Mutable, a: Const, b: Const) void {
        // Trivial cases, llsignedor does not support zero.
        if (a.eqlZero()) {
            r.copy(b);
            return;
        } else if (b.eqlZero()) {
            r.copy(a);
            return;
        }

        if (a.limbs.len >= b.limbs.len) {
            r.positive = llsignedor(r.limbs, a.limbs, a.positive, b.limbs, b.positive);
            r.normalize(if (b.positive) a.limbs.len else b.limbs.len);
        } else {
            r.positive = llsignedor(r.limbs, b.limbs, b.positive, a.limbs, a.positive);
            r.normalize(if (a.positive) b.limbs.len else a.limbs.len);
        }
    }

    /// r = a & b under 2s complement semantics.
    /// r may alias with a or b.
    ///
    /// Asserts that r has enough limbs to store the result.
    /// If only a is positive, the upper bound is `a.limbs.len`.
    /// If only b is positive, the upper bound is `b.limbs.len`.
    /// If a and b are positive, the upper bound is `@min(a.limbs.len, b.limbs.len)`.
    /// If a and b are negative, the upper bound is `@max(a.limbs.len, b.limbs.len) + 1`.
    pub fn bitAnd(r: *Mutable, a: Const, b: Const) void {
        // Trivial cases, llsignedand does not support zero.
        if (a.eqlZero()) {
            r.copy(a);
            return;
        } else if (b.eqlZero()) {
            r.copy(b);
            return;
        }

        if (a.limbs.len >= b.limbs.len) {
            r.positive = llsignedand(r.limbs, a.limbs, a.positive, b.limbs, b.positive);
            r.normalize(if (b.positive) b.limbs.len else if (a.positive) a.limbs.len else a.limbs.len + 1);
        } else {
            r.positive = llsignedand(r.limbs, b.limbs, b.positive, a.limbs, a.positive);
            r.normalize(if (a.positive) a.limbs.len else if (b.positive) b.limbs.len else b.limbs.len + 1);
        }
    }

    /// r = a ^ b under 2s complement semantics.
    /// r may alias with a or b.
    ///
    /// Asserts that r has enough limbs to store the result. If a and b share the same signedness, the
    /// upper bound is `@max(a.limbs.len, b.limbs.len)`. Otherwise, if either a or b is negative
    /// but not both, the upper bound is `@max(a.limbs.len, b.limbs.len) + 1`.
    pub fn bitXor(r: *Mutable, a: Const, b: Const) void {
        // Trivial cases, because llsignedxor does not support negative zero.
        if (a.eqlZero()) {
            r.copy(b);
            return;
        } else if (b.eqlZero()) {
            r.copy(a);
            return;
        }

        if (a.limbs.len > b.limbs.len) {
            r.positive = llsignedxor(r.limbs, a.limbs, a.positive, b.limbs, b.positive);
            r.normalize(a.limbs.len + @intFromBool(a.positive != b.positive));
        } else {
            r.positive = llsignedxor(r.limbs, b.limbs, b.positive, a.limbs, a.positive);
            r.normalize(b.limbs.len + @intFromBool(a.positive != b.positive));
        }
    }

    /// rma may alias x or y.
    /// x and y may alias each other.
    /// Asserts that `rma` has enough limbs to store the result. Upper bound is
    /// `@min(x.limbs.len, y.limbs.len)`.
    ///
    /// `limbs_buffer` is used for temporary storage during the operation. When this function returns,
    /// it will have the same length as it had when the function was called.
    pub fn gcd(rma: *Mutable, x: Const, y: Const, limbs_buffer: *std.array_list.Managed(Limb)) !void {
        const prev_len = limbs_buffer.items.len;
        defer limbs_buffer.shrinkRetainingCapacity(prev_len);
        const x_copy = if (rma.limbs.ptr == x.limbs.ptr) blk: {
            const start = limbs_buffer.items.len;
            try limbs_buffer.appendSlice(x.limbs);
            break :blk x.toMutable(limbs_buffer.items[start..]).toConst();
        } else x;
        const y_copy = if (rma.limbs.ptr == y.limbs.ptr) blk: {
            const start = limbs_buffer.items.len;
            try limbs_buffer.appendSlice(y.limbs);
            break :blk y.toMutable(limbs_buffer.items[start..]).toConst();
        } else y;

        return gcdLehmer(rma, x_copy, y_copy, limbs_buffer);
    }

    /// q = a ^ b
    ///
    /// r may not alias a.
    ///
    /// Asserts that `r` has enough limbs to store the result. Upper bound is
    /// `calcPowLimbsBufferLen(a.bitCountAbs(), b)`.
    ///
    /// `limbs_buffer` is used for temporary storage.
    /// The amount required is given by `calcPowLimbsBufferLen`.
    pub fn pow(r: *Mutable, a: Const, b: u32, limbs_buffer: []Limb) void {
        assert(r.limbs.ptr != a.limbs.ptr); // illegal aliasing

        // Handle all the trivial cases first
        switch (b) {
            0 => {
                // a^0 = 1
                return r.set(1);
            },
            1 => {
                // a^1 = a
                return r.copy(a);
            },
            else => {},
        }

        if (a.eqlZero()) {
            // 0^b = 0
            return r.set(0);
        } else if (a.limbs.len == 1 and a.limbs[0] == 1) {
            // 1^b = 1 and -1^b = ±1
            r.set(1);
            r.positive = a.positive or (b & 1) == 0;
            return;
        }

        // Here a>1 and b>1
        const needed_limbs = calcPowLimbsBufferLen(a.bitCountAbs(), b);
        assert(r.limbs.len >= needed_limbs);
        assert(limbs_buffer.len >= needed_limbs);

        llpow(r.limbs, a.limbs, b, limbs_buffer);

        r.normalize(needed_limbs);
        r.positive = a.positive or (b & 1) == 0;
    }

    /// r = ⌊√a⌋
    ///
    /// r may alias a.
    ///
    /// Asserts that `r` has enough limbs to store the result. Upper bound is
    /// `(a.limbs.len - 1) / 2 + 1`.
    ///
    /// `limbs_buffer` is used for temporary storage.
    /// The amount required is given by `calcSqrtLimbsBufferLen`.
    pub fn sqrt(
        r: *Mutable,
        a: Const,
        limbs_buffer: []Limb,
    ) void {
        // Brent and Zimmermann, Modern Computer Arithmetic, Algorithm 1.13 SqrtInt
        // https://members.loria.fr/PZimmermann/mca/pub226.html
        var buf_index: usize = 0;
        var t = b: {
            const start = buf_index;
            buf_index += a.limbs.len;
            break :b Mutable.init(limbs_buffer[start..buf_index], 0);
        };
        var u = b: {
            const start = buf_index;
            const shift = (a.bitCountAbs() + 1) / 2;
            buf_index += 1 + ((shift / limb_bits) + 1);
            var m = Mutable.init(limbs_buffer[start..buf_index], 1);
            m.shiftLeft(m.toConst(), shift); // u must be >= ⌊√a⌋, and should be as small as possible for efficiency
            break :b m;
        };
        var s = b: {
            const start = buf_index;
            buf_index += u.limbs.len;
            break :b u.toConst().toMutable(limbs_buffer[start..buf_index]);
        };
        var rem = b: {
            const start = buf_index;
            buf_index += s.limbs.len;
            break :b Mutable.init(limbs_buffer[start..buf_index], 0);
        };

        while (true) {
            t.divFloor(&rem, a, s.toConst(), limbs_buffer[buf_index..]);
            t.add(t.toConst(), s.toConst());
            u.shiftRight(t.toConst(), 1);

            if (u.toConst().order(s.toConst()).compare(.gte)) {
                r.copy(s.toConst());
                return;
            }

            // Avoid copying u to s by swapping u and s
            const tmp_s = s;
            s = u;
            u = tmp_s;
        }
    }

    /// rma may not alias x or y.
    /// x and y may alias each other.
    /// Asserts that `rma` has enough limbs to store the result. Upper bound is given by `calcGcdNoAliasLimbLen`.
    ///
    /// `limbs_buffer` is used for temporary storage during the operation.
    pub fn gcdNoAlias(rma: *Mutable, x: Const, y: Const, limbs_buffer: *std.array_list.Managed(Limb)) !void {
        assert(rma.limbs.ptr != x.limbs.ptr); // illegal aliasing
        assert(rma.limbs.ptr != y.limbs.ptr); // illegal aliasing
        return gcdLehmer(rma, x, y, limbs_buffer);
    }

    fn gcdLehmer(result: *Mutable, xa: Const, ya: Const, limbs_buffer: *std.array_list.Managed(Limb)) !void {
        var x = try xa.toManaged(limbs_buffer.allocator);
        defer x.deinit();
        x.abs();

        var y = try ya.toManaged(limbs_buffer.allocator);
        defer y.deinit();
        y.abs();

        if (x.toConst().order(y.toConst()) == .lt) {
            x.swap(&y);
        }

        var t_big = try Managed.init(limbs_buffer.allocator);
        defer t_big.deinit();

        var r = try Managed.init(limbs_buffer.allocator);
        defer r.deinit();

        var tmp_x = try Managed.init(limbs_buffer.allocator);
        defer tmp_x.deinit();

        while (y.len() > 1 and !y.eqlZero()) {
            assert(x.isPositive() and y.isPositive());
            assert(x.len() >= y.len());

            var xh: SignedDoubleLimb = x.limbs[x.len() - 1];
            var yh: SignedDoubleLimb = if (x.len() > y.len()) 0 else y.limbs[x.len() - 1];

            var A: SignedDoubleLimb = 1;
            var B: SignedDoubleLimb = 0;
            var C: SignedDoubleLimb = 0;
            var D: SignedDoubleLimb = 1;

            while (yh + C != 0 and yh + D != 0) {
                const q = @divFloor(xh + A, yh + C);
                const qp = @divFloor(xh + B, yh + D);
                if (q != qp) {
                    break;
                }

                var t = A - q * C;
                A = C;
                C = t;
                t = B - q * D;
                B = D;
                D = t;

                t = xh - q * yh;
                xh = yh;
                yh = t;
            }

            if (B == 0) {
                // t_big = x % y, r is unused
                try r.divTrunc(&t_big, &x, &y);
                assert(t_big.isPositive());

                x.swap(&y);
                y.swap(&t_big);
            } else {
                var storage: [8]Limb = undefined;
                const Ap = fixedIntFromSignedDoubleLimb(A, storage[0..2]).toManaged(limbs_buffer.allocator);
                const Bp = fixedIntFromSignedDoubleLimb(B, storage[2..4]).toManaged(limbs_buffer.allocator);
                const Cp = fixedIntFromSignedDoubleLimb(C, storage[4..6]).toManaged(limbs_buffer.allocator);
                const Dp = fixedIntFromSignedDoubleLimb(D, storage[6..8]).toManaged(limbs_buffer.allocator);

                // t_big = Ax + By
                try r.mul(&x, &Ap);
                try t_big.mul(&y, &Bp);
                try t_big.add(&r, &t_big);

                // u = Cx + Dy, r as u
                try tmp_x.copy(x.toConst());
                try x.mul(&tmp_x, &Cp);
                try r.mul(&y, &Dp);
                try r.add(&x, &r);

                x.swap(&t_big);
                y.swap(&r);
            }
        }

        // euclidean algorithm
        assert(x.toConst().order(y.toConst()) != .lt);

        while (!y.toConst().eqlZero()) {
            try t_big.divTrunc(&r, &x, &y);
            x.swap(&y);
            y.swap(&r);
        }

        result.copy(x.toConst());
    }

    // Truncates by default.
    fn div(q: *Mutable, r: *Mutable, x: *Mutable, y: *Mutable) void {
        assert(!y.eqlZero()); // division by zero
        assert(q != r); // illegal aliasing

        const q_positive = (x.positive == y.positive);
        const r_positive = x.positive;

        if (x.toConst().orderAbs(y.toConst()) == .lt) {
            // q may alias x so handle r first.
            r.copy(x.toConst());
            r.positive = r_positive;

            q.set(0);
            return;
        }

        // Handle trailing zero-words of divisor/dividend. These are not handled in the following
        // algorithms.
        // Note, there must be a non-zero limb for either.
        // const x_trailing = std.mem.indexOfScalar(Limb, x.limbs[0..x.len], 0).?;
        // const y_trailing = std.mem.indexOfScalar(Limb, y.limbs[0..y.len], 0).?;

        const x_trailing = for (x.limbs[0..x.len], 0..) |xi, i| {
            if (xi != 0) break i;
        } else unreachable;

        const y_trailing = for (y.limbs[0..y.len], 0..) |yi, i| {
            if (yi != 0) break i;
        } else unreachable;

        const xy_trailing = @min(x_trailing, y_trailing);

        if (y.len - xy_trailing == 1) {
            const divisor = y.limbs[y.len - 1];

            // Optimization for small divisor. By using a half limb we can avoid requiring DoubleLimb
            // divisions in the hot code path. This may often require compiler_rt software-emulation.
            if (divisor < maxInt(HalfLimb)) {
                lldiv0p5(q.limbs, &r.limbs[0], x.limbs[xy_trailing..x.len], @as(HalfLimb, @intCast(divisor)));
            } else {
                lldiv1(q.limbs, &r.limbs[0], x.limbs[xy_trailing..x.len], divisor);
            }

            q.normalize(x.len - xy_trailing);
            q.positive = q_positive;

            r.len = 1;
            r.positive = r_positive;
        } else {
            // Shrink x, y such that the trailing zero limbs shared between are removed.
            var x0 = Mutable{
                .limbs = x.limbs[xy_trailing..],
                .len = x.len - xy_trailing,
                .positive = true,
            };

            var y0 = Mutable{
                .limbs = y.limbs[xy_trailing..],
                .len = y.len - xy_trailing,
                .positive = true,
            };

            divmod(q, r, &x0, &y0);
            q.positive = q_positive;

            r.positive = r_positive;
        }

        if (xy_trailing != 0 and r.limbs[r.len - 1] != 0) {
            // Manually shift here since we know its limb aligned.
            @memmove(r.limbs[xy_trailing..][0..r.len], r.limbs[0..r.len]);
            @memset(r.limbs[0..xy_trailing], 0);
            r.len += xy_trailing;
        }
    }

    /// Handbook of Applied Cryptography, 14.20
    ///
    /// x = qy + r where 0 <= r < y
    /// y is modified but returned intact.
    fn divmod(
        q: *Mutable,
        r: *Mutable,
        x: *Mutable,
        y: *Mutable,
    ) void {
        // 0.
        // Normalize so that y[t] > b/2
        const lz = @clz(y.limbs[y.len - 1]);
        const norm_shift = if (lz == 0 and y.toConst().isOdd())
            limb_bits // Force an extra limb so that y is even.
        else
            lz;

        x.shiftLeft(x.toConst(), norm_shift);
        y.shiftLeft(y.toConst(), norm_shift);

        const n = x.len - 1;
        const t = y.len - 1;
        const shift = n - t;

        // 1.
        // for 0 <= j <= n - t, set q[j] to 0
        q.len = shift + 1;
        q.positive = true;
        @memset(q.limbs[0..q.len], 0);

        // 2.
        // while x >= y * b^(n - t):
        //    x -= y * b^(n - t)
        //    q[n - t] += 1
        // Note, this algorithm is performed only once if y[t] > base/2 and y is even, which we
        // enforced in step 0. This means we can replace the while with an if.
        // Note, multiplication by b^(n - t) comes down to shifting to the right by n - t limbs.
        // We can also replace x >= y * b^(n - t) by x/b^(n - t) >= y, and use shifts for that.
        {
            // x >= y * b^(n - t) can be replaced by x/b^(n - t) >= y.

            // 'divide' x by b^(n - t)
            var tmp = Mutable{
                .limbs = x.limbs[shift..],
                .len = x.len - shift,
                .positive = true,
            };

            if (tmp.toConst().order(y.toConst()) != .lt) {
                // Perform x -= y * b^(n - t)
                // Note, we can subtract y from x[n - t..] and get the result without shifting.
                // We can also re-use tmp which already contains the relevant part of x. Note that
                // this also edits x.
                // Due to the check above, this cannot underflow.
                tmp.sub(tmp.toConst(), y.toConst());

                // tmp.sub normalized tmp, but we need to normalize x now.
                x.limbs.len = tmp.limbs.len + shift;

                q.limbs[shift] += 1;
            }
        }

        // 3.
        // for i from n down to t + 1, do
        var i = n;
        while (i >= t + 1) : (i -= 1) {
            const k = i - t - 1;
            // 3.1.
            // if x_i == y_t:
            //   q[i - t - 1] = b - 1
            // else:
            //   q[i - t - 1] = (x[i] * b + x[i - 1]) / y[t]
            if (x.limbs[i] == y.limbs[t]) {
                q.limbs[k] = maxInt(Limb);
            } else {
                const q0 = (@as(DoubleLimb, x.limbs[i]) << limb_bits) | @as(DoubleLimb, x.limbs[i - 1]);
                const n0 = @as(DoubleLimb, y.limbs[t]);
                q.limbs[k] = @as(Limb, @intCast(q0 / n0));
            }

            // 3.2
            // while q[i - t - 1] * (y[t] * b + y[t - 1] > x[i] * b * b + x[i - 1] + x[i - 2]:
            //   q[i - t - 1] -= 1
            // Note, if y[t] > b / 2 this part is repeated no more than twice.

            // Extract from y.
            const y0 = if (t > 0) y.limbs[t - 1] else 0;
            const y1 = y.limbs[t];

            // Extract from x.
            // Note, big endian.
            const tmp0 = [_]Limb{
                x.limbs[i],
                if (i >= 1) x.limbs[i - 1] else 0,
                if (i >= 2) x.limbs[i - 2] else 0,
            };

            while (true) {
                // Ad-hoc 2x1 multiplication with q[i - t - 1].
                // Note, big endian.
                var tmp1 = [_]Limb{ 0, undefined, undefined };
                tmp1[2] = addMulLimbWithCarry(0, y0, q.limbs[k], &tmp1[0]);
                tmp1[1] = addMulLimbWithCarry(0, y1, q.limbs[k], &tmp1[0]);

                // Big-endian compare
                if (mem.order(Limb, &tmp1, &tmp0) != .gt)
                    break;

                q.limbs[k] -= 1;
            }

            // 3.3.
            // x -= q[i - t - 1] * y * b^(i - t - 1)
            // Note, we multiply by a single limb here.
            // The shift doesn't need to be performed if we add the result of the first multiplication
            // to x[i - t - 1].
            const underflow = llmulLimb(.sub, x.limbs[k..x.len], y.limbs[0..y.len], q.limbs[k]);

            // 3.4.
            // if x < 0:
            //   x += y * b^(i - t - 1)
            //   q[i - t - 1] -= 1
            // Note, we check for x < 0 using the underflow flag from the previous operation.
            if (underflow) {
                // While we didn't properly set the signedness of x, this operation should 'flow' it back to positive.
                llaccum(.add, x.limbs[k..x.len], y.limbs[0..y.len]);
                q.limbs[k] -= 1;
            }
        }

        x.normalize(x.len);
        q.normalize(q.len);

        // De-normalize r and y.
        r.shiftRight(x.toConst(), norm_shift);
        y.shiftRight(y.toConst(), norm_shift);
    }

    /// Truncate an integer to a number of bits, following 2s-complement semantics.
    /// `r` may alias `a`.
    ///
    /// Asserts `r` has enough storage to compute the result.
    /// The upper bound is `calcTwosCompLimbCount(a.len)`.
    pub fn truncate(r: *Mutable, a: Const, signedness: Signedness, bit_count: usize) void {
        // Handle 0-bit integers.
        if (bit_count == 0) {
            @branchHint(.unlikely);
            r.set(0);
            return;
        }

        const max_limbs = calcTwosCompLimbCount(bit_count);
        const sign_bit = @as(Limb, 1) << @truncate(bit_count - 1);
        const mask = @as(Limb, maxInt(Limb)) >> @truncate(-%bit_count);

        // Guess whether the result will have the same sign as `a`.
        //  * If the result will be signed zero, the guess is `true`.
        //  * If the result will be the minimum signed integer, the guess is `false`.
        //  * If the result will be unsigned zero, the guess is `a.positive`.
        //  * Otherwise the guess is correct.
        const same_sign_guess = switch (signedness) {
            .signed => max_limbs > a.limbs.len or a.limbs[max_limbs - 1] & sign_bit == 0,
            .unsigned => a.positive,
        };

        const abs_trunc_a: Const = .{
            .positive = true,
            .limbs = a.limbs[0..llnormalize(a.limbs[0..@min(a.limbs.len, max_limbs)])],
        };
        if (same_sign_guess or abs_trunc_a.eqlZero()) {
            // One of the following is true:
            //  * The result is zero.
            //  * The result is non-zero and has the same sign as `a`.
            r.copy(abs_trunc_a);
            if (max_limbs <= r.len) r.limbs[max_limbs - 1] &= mask;
            r.normalize(r.len);
            r.positive = a.positive or r.eqlZero();
        } else {
            // One of the following is true:
            //  * The result is the minimum signed integer.
            //  * The result is unsigned zero.
            //  * The result is non-zero and has the opposite sign as `a`.
            r.addScalar(abs_trunc_a, -1);
            llnot(r.limbs[0..r.len]);
            @memset(r.limbs[r.len..max_limbs], maxInt(Limb));
            r.limbs[max_limbs - 1] &= mask;
            r.normalize(max_limbs);
            r.positive = switch (signedness) {
                // The only value with the sign bit still set is the minimum signed integer.
                .signed => !a.positive and r.limbs[max_limbs - 1] & sign_bit == 0,
                .unsigned => !a.positive or r.eqlZero(),
            };
        }
    }

    /// Saturate an integer to a number of bits, following 2s-complement semantics.
    /// r may alias a.
    ///
    /// Asserts `r` has enough storage to store the result.
    /// The upper bound is `calcTwosCompLimbCount(a.len)`.
    pub fn saturate(r: *Mutable, a: Const, signedness: Signedness, bit_count: usize) void {
        if (!a.fitsInTwosComp(signedness, bit_count)) {
            r.setTwosCompIntLimit(if (r.positive) .max else .min, signedness, bit_count);
        }
    }

    /// Read the value of `x` from `buffer`.
    /// Asserts that `buffer` is large enough to contain a value of bit-size `bit_count`.
    ///
    /// The contents of `buffer` are interpreted as if they were the contents of
    /// @ptrCast(*[buffer.len]const u8, &x). Byte ordering is determined by `endian`
    /// and any required padding bits are expected on the MSB end.
    pub fn readTwosComplement(
        x: *Mutable,
        buffer: []const u8,
        bit_count: usize,
        endian: Endian,
        signedness: Signedness,
    ) void {
        return readPackedTwosComplement(x, buffer, 0, bit_count, endian, signedness);
    }

    /// Read the value of `x` from a packed memory `buffer`.
    /// Asserts that `buffer` is large enough to contain a value of bit-size `bit_count`
    /// at offset `bit_offset`.
    ///
    /// This is equivalent to loading the value of an integer with `bit_count` bits as
    /// if it were a field in packed memory at the provided bit offset.
    pub fn readPackedTwosComplement(
        x: *Mutable,
        buffer: []const u8,
        bit_offset: usize,
        bit_count: usize,
        endian: Endian,
        signedness: Signedness,
    ) void {
        if (bit_count == 0) {
            x.limbs[0] = 0;
            x.len = 1;
            x.positive = true;
            return;
        }

        // Check whether the input is negative
        var positive = true;
        if (signedness == .signed) {
            const total_bits = bit_offset + bit_count;
            const last_byte = switch (endian) {
                .little => ((total_bits + 7) / 8) - 1,
                .big => buffer.len - ((total_bits + 7) / 8),
            };

            const sign_bit = @as(u8, 1) << @as(u3, @intCast((total_bits - 1) % 8));
            positive = ((buffer[last_byte] & sign_bit) == 0);
        }

        // Copy all complete limbs
        var carry: u1 = 1;
        var limb_index: usize = 0;
        var bit_index: usize = 0;
        while (limb_index < bit_count / @bitSizeOf(Limb)) : (limb_index += 1) {
            // Read one Limb of bits
            var limb = mem.readPackedInt(Limb, buffer, bit_index + bit_offset, endian);
            bit_index += @bitSizeOf(Limb);

            // 2's complement (bitwise not, then add carry bit)
            if (!positive) {
                const ov = @addWithOverflow(~limb, carry);
                limb = ov[0];
                carry = ov[1];
            }
            x.limbs[limb_index] = limb;
        }

        // Copy the remaining bits
        if (bit_count != bit_index) {
            // Read all remaining bits
            var limb = switch (signedness) {
                .unsigned => mem.readVarPackedInt(Limb, buffer, bit_index + bit_offset, bit_count - bit_index, endian, .unsigned),
                .signed => b: {
                    const SLimb = std.meta.Int(.signed, @bitSizeOf(Limb));
                    const limb = mem.readVarPackedInt(SLimb, buffer, bit_index + bit_offset, bit_count - bit_index, endian, .signed);
                    break :b @as(Limb, @bitCast(limb));
                },
            };

            // 2's complement (bitwise not, then add carry bit)
            if (!positive) {
                const ov = @addWithOverflow(~limb, carry);
                assert(ov[1] == 0);
                limb = ov[0];
            }
            x.limbs[limb_index] = limb;

            limb_index += 1;
        }

        x.positive = positive;
        x.len = limb_index;
        x.normalize(x.len);
    }

    /// Normalize a possible sequence of leading zeros.
    ///
    /// [1, 2, 3, 4, 0] -> [1, 2, 3, 4]
    /// [1, 2, 0, 0, 0] -> [1, 2]
    /// [0, 0, 0, 0, 0] -> [0]
    pub fn normalize(r: *Mutable, length: usize) void {
        r.len = llnormalize(r.limbs[0..length]);
    }

    pub fn format(self: Mutable, w: *std.Io.Writer) std.Io.Writer.Error!void {
        return formatNumber(self, w, .{});
    }

    pub fn formatNumber(self: Const, w: *std.Io.Writer, n: std.fmt.Number) std.Io.Writer.Error!void {
        return self.toConst().formatNumber(w, n);
    }
}