struct Mutable [src]

A arbitrary-precision big integer, with a fixed set of mutable limbs.

Fields

limbs: []LimbRaw digits. These are: Little-endian ordered limbs.len >= 1 Zero is represented as limbs.len == 1 with limbs[0] == 0. Accessing limbs directly should be avoided. These are allocated limbs; the len field tells the valid range.
len: usize
positive: bool

Members

Source

pub const Mutable = struct { /// Raw digits. These are: /// /// * Little-endian ordered /// * limbs.len >= 1 /// * Zero is represented as limbs.len == 1 with limbs[0] == 0. /// /// Accessing limbs directly should be avoided. /// These are allocated limbs; the `len` field tells the valid range. limbs: []Limb, len: usize, positive: bool, pub fn toConst(self: Mutable) Const { return .{ .limbs = self.limbs[0..self.len], .positive = self.positive, }; } /// Returns true if `a == 0`. pub fn eqlZero(self: Mutable) bool { return self.toConst().eqlZero(); } /// Asserts that the allocator owns the limbs memory. If this is not the case, /// use `toConst().toManaged()`. pub fn toManaged(self: Mutable, allocator: Allocator) Managed { return .{ .allocator = allocator, .limbs = self.limbs, .metadata = if (self.positive) self.len & ~Managed.sign_bit else self.len | Managed.sign_bit, }; } /// `value` is a primitive integer type. /// Asserts the value fits within the provided `limbs_buffer`. /// Note: `calcLimbLen` can be used to figure out how big an array to allocate for `limbs_buffer`. pub fn init(limbs_buffer: []Limb, value: anytype) Mutable { limbs_buffer[0] = 0; var self: Mutable = .{ .limbs = limbs_buffer, .len = 1, .positive = true, }; self.set(value); return self; } /// Copies the value of a Const to an existing Mutable so that they both have the same value. /// Asserts the value fits in the limbs buffer. pub fn copy(self: *Mutable, other: Const) void { if (self.limbs.ptr != other.limbs.ptr) { @memcpy(self.limbs[0..other.limbs.len], other.limbs[0..other.limbs.len]); } // Normalize before setting `positive` so the `eqlZero` doesn't need to iterate // over the extra zero limbs. self.normalize(other.limbs.len); self.positive = other.positive or other.eqlZero(); } /// Efficiently swap an Mutable with another. This swaps the limb pointers and a full copy is not /// performed. The address of the limbs field will not be the same after this function. pub fn swap(self: *Mutable, other: *Mutable) void { mem.swap(Mutable, self, other); } pub fn dump(self: Mutable) void { for (self.limbs[0..self.len]) |limb| { std.debug.print("{x} ", .{limb}); } std.debug.print("capacity={} positive={}\n", .{ self.limbs.len, self.positive }); } /// Clones an Mutable and returns a new Mutable with the same value. The new Mutable is a deep copy and /// can be modified separately from the original. /// Asserts that limbs is big enough to store the value. pub fn clone(other: Mutable, limbs: []Limb) Mutable { @memcpy(limbs[0..other.len], other.limbs[0..other.len]); return .{ .limbs = limbs, .len = other.len, .positive = other.positive, }; } pub fn negate(self: *Mutable) void { self.positive = !self.positive; } /// Modify to become the absolute value pub fn abs(self: *Mutable) void { self.positive = true; } /// Sets the Mutable to value. Value must be an primitive integer type. /// Asserts the value fits within the limbs buffer. /// Note: `calcLimbLen` can be used to figure out how big the limbs buffer /// needs to be to store a specific value. pub fn set(self: *Mutable, value: anytype) void { const T = @TypeOf(value); const needed_limbs = calcLimbLen(value); assert(needed_limbs <= self.limbs.len); // value too big self.len = needed_limbs; self.positive = value >= 0; switch (@typeInfo(T)) { .int => |info| { var w_value = @abs(value); if (info.bits <= limb_bits) { self.limbs[0] = w_value; } else { var i: usize = 0; while (true) : (i += 1) { self.limbs[i] = @as(Limb, @truncate(w_value)); w_value >>= limb_bits; if (w_value == 0) break; } } }, .comptime_int => { comptime var w_value = @abs(value); if (w_value <= maxInt(Limb)) { self.limbs[0] = w_value; } else { const mask = (1 << limb_bits) - 1; comptime var i = 0; inline while (true) : (i += 1) { self.limbs[i] = w_value & mask; w_value >>= limb_bits; if (w_value == 0) break; } } }, else => @compileError("cannot set Mutable using type " ++ @typeName(T)), } } /// Set self from the string representation `value`. /// /// `value` must contain only digits <= `base` and is case insensitive. Base prefixes are /// not allowed (e.g. 0x43 should simply be 43). Underscores in the input string are /// ignored and can be used as digit separators. /// /// Asserts there is enough memory for the value in `self.limbs`. An upper bound on number of limbs can /// be determined with `calcSetStringLimbCount`. /// Asserts the base is in the range [2, 36]. /// /// Returns an error if the value has invalid digits for the requested base. /// /// `limbs_buffer` is used for temporary storage. The size required can be found with /// `calcSetStringLimbsBufferLen`. /// /// If `allocator` is provided, it will be used for temporary storage to improve /// multiplication performance. `error.OutOfMemory` is handled with a fallback algorithm. pub fn setString( self: *Mutable, base: u8, value: []const u8, limbs_buffer: []Limb, allocator: ?Allocator, ) error{InvalidCharacter}!void { assert(base >= 2); assert(base <= 36); var i: usize = 0; var positive = true; if (value.len > 0 and value[0] == '-') { positive = false; i += 1; } const ap_base: Const = .{ .limbs = &[_]Limb{base}, .positive = true }; self.set(0); for (value[i..]) |ch| { if (ch == '_') { continue; } const d = try std.fmt.charToDigit(ch, base); const ap_d: Const = .{ .limbs = &[_]Limb{d}, .positive = true }; self.mul(self.toConst(), ap_base, limbs_buffer, allocator); self.add(self.toConst(), ap_d); } self.positive = positive; } /// Set self to either bound of a 2s-complement integer. /// Note: The result is still sign-magnitude, not twos complement! In order to convert the /// result to twos complement, it is sufficient to take the absolute value. /// /// Asserts the result fits in `r`. An upper bound on the number of limbs needed by /// r is `calcTwosCompLimbCount(bit_count)`. pub fn setTwosCompIntLimit( r: *Mutable, limit: TwosCompIntLimit, signedness: Signedness, bit_count: usize, ) void { // Handle zero-bit types. if (bit_count == 0) { r.set(0); return; } const req_limbs = calcTwosCompLimbCount(bit_count); const bit: Log2Limb = @truncate(bit_count - 1); const signmask = @as(Limb, 1) << bit; // 0b0..010..0 where 1 is the sign bit. const mask = (signmask << 1) -% 1; // 0b0..011..1 where the leftmost 1 is the sign bit. r.positive = true; switch (signedness) { .signed => switch (limit) { .min => { // Negative bound, signed = -0x80. r.len = req_limbs; @memset(r.limbs[0 .. r.len - 1], 0); r.limbs[r.len - 1] = signmask; r.positive = false; }, .max => { // Positive bound, signed = 0x7F // Note, in this branch we need to normalize because the first bit is // supposed to be 0. // Special case for 1-bit integers. if (bit_count == 1) { r.set(0); } else { const new_req_limbs = calcTwosCompLimbCount(bit_count - 1); const msb = @as(Log2Limb, @truncate(bit_count - 2)); const new_signmask = @as(Limb, 1) << msb; // 0b0..010..0 where 1 is the sign bit. const new_mask = (new_signmask << 1) -% 1; // 0b0..001..1 where the rightmost 0 is the sign bit. r.len = new_req_limbs; @memset(r.limbs[0 .. r.len - 1], maxInt(Limb)); r.limbs[r.len - 1] = new_mask; } }, }, .unsigned => switch (limit) { .min => { // Min bound, unsigned = 0x00 r.set(0); }, .max => { // Max bound, unsigned = 0xFF r.len = req_limbs; @memset(r.limbs[0 .. r.len - 1], maxInt(Limb)); r.limbs[r.len - 1] = mask; }, }, } } /// r = a + scalar /// /// r and a may be aliases. /// scalar is a primitive integer type. /// /// Asserts the result fits in `r`. An upper bound on the number of limbs needed by /// r is `@max(a.limbs.len, calcLimbLen(scalar)) + 1`. pub fn addScalar(r: *Mutable, a: Const, scalar: anytype) void { // Normally we could just determine the number of limbs needed with calcLimbLen, // but that is not comptime-known when scalar is not a comptime_int. Instead, we // use calcTwosCompLimbCount for a non-comptime_int scalar, which can be pessimistic // in the case that scalar happens to be small in magnitude within its type, but it // is well worth being able to use the stack and not needing an allocator passed in. // Note that Mutable.init still sets len to calcLimbLen(scalar) in any case. const limb_len = comptime switch (@typeInfo(@TypeOf(scalar))) { .comptime_int => calcLimbLen(scalar), .int => |info| calcTwosCompLimbCount(info.bits), else => @compileError("expected scalar to be an int"), }; var limbs: [limb_len]Limb = undefined; const operand = init(&limbs, scalar).toConst(); return add(r, a, operand); } /// Base implementation for addition. Adds `@max(a.limbs.len, b.limbs.len)` elements from a and b, /// and returns whether any overflow occurred. /// r, a and b may be aliases. /// /// Asserts r has enough elements to hold the result. The upper bound is `@max(a.limbs.len, b.limbs.len)`. fn addCarry(r: *Mutable, a: Const, b: Const) bool { if (a.eqlZero()) { r.copy(b); return false; } else if (b.eqlZero()) { r.copy(a); return false; } else if (a.positive != b.positive) { if (a.positive) { // (a) + (-b) => a - b return r.subCarry(a, b.abs()); } else { // (-a) + (b) => b - a return r.subCarry(b, a.abs()); } } else { r.positive = a.positive; if (a.limbs.len >= b.limbs.len) { const c = lladdcarry(r.limbs, a.limbs, b.limbs); r.normalize(a.limbs.len); return c != 0; } else { const c = lladdcarry(r.limbs, b.limbs, a.limbs); r.normalize(b.limbs.len); return c != 0; } } } /// r = a + b /// /// r, a and b may be aliases. /// /// Asserts the result fits in `r`. An upper bound on the number of limbs needed by /// r is `@max(a.limbs.len, b.limbs.len) + 1`. pub fn add(r: *Mutable, a: Const, b: Const) void { if (r.addCarry(a, b)) { // Fix up the result. Note that addCarry normalizes by a.limbs.len or b.limbs.len, // so we need to set the length here. const msl = @max(a.limbs.len, b.limbs.len); // `[add|sub]Carry` normalizes by `msl`, so we need to fix up the result manually here. // Note, the fact that it normalized means that the intermediary limbs are zero here. r.len = msl + 1; r.limbs[msl] = 1; // If this panics, there wasn't enough space in `r`. } } /// r = a + b with 2s-complement wrapping semantics. Returns whether overflow occurred. /// r, a and b may be aliases /// /// Asserts the result fits in `r`. An upper bound on the number of limbs needed by /// r is `calcTwosCompLimbCount(bit_count)`. pub fn addWrap(r: *Mutable, a: Const, b: Const, signedness: Signedness, bit_count: usize) bool { const req_limbs = calcTwosCompLimbCount(bit_count); // Slice of the upper bits if they exist, these will be ignored and allows us to use addCarry to determine // if an overflow occurred. const x = Const{ .positive = a.positive, .limbs = a.limbs[0..@min(req_limbs, a.limbs.len)], }; const y = Const{ .positive = b.positive, .limbs = b.limbs[0..@min(req_limbs, b.limbs.len)], }; var carry_truncated = false; if (r.addCarry(x, y)) { // There are two possibilities here: // - We overflowed req_limbs. In this case, the carry is ignored, as it would be removed by // truncate anyway. // - a and b had less elements than req_limbs, and those were overflowed. This case needs to be handled. // Note: after this we still might need to wrap. const msl = @max(a.limbs.len, b.limbs.len); if (msl < req_limbs) { r.limbs[msl] = 1; r.len = req_limbs; @memset(r.limbs[msl + 1 .. req_limbs], 0); } else { carry_truncated = true; } } if (!r.toConst().fitsInTwosComp(signedness, bit_count)) { r.truncate(r.toConst(), signedness, bit_count); return true; } return carry_truncated; } /// r = a + b with 2s-complement saturating semantics. /// r, a and b may be aliases. /// /// Assets the result fits in `r`. Upper bound on the number of limbs needed by /// r is `calcTwosCompLimbCount(bit_count)`. pub fn addSat(r: *Mutable, a: Const, b: Const, signedness: Signedness, bit_count: usize) void { const req_limbs = calcTwosCompLimbCount(bit_count); // Slice of the upper bits if they exist, these will be ignored and allows us to use addCarry to determine // if an overflow occurred. const x = Const{ .positive = a.positive, .limbs = a.limbs[0..@min(req_limbs, a.limbs.len)], }; const y = Const{ .positive = b.positive, .limbs = b.limbs[0..@min(req_limbs, b.limbs.len)], }; if (r.addCarry(x, y)) { // There are two possibilities here: // - We overflowed req_limbs, in which case we need to saturate. // - a and b had less elements than req_limbs, and those were overflowed. // Note: In this case, might _also_ need to saturate. const msl = @max(a.limbs.len, b.limbs.len); if (msl < req_limbs) { r.limbs[msl] = 1; r.len = req_limbs; // Note: Saturation may still be required if msl == req_limbs - 1 } else { // Overflowed req_limbs, definitely saturate. r.setTwosCompIntLimit(if (r.positive) .max else .min, signedness, bit_count); } } // Saturate if the result didn't fit. r.saturate(r.toConst(), signedness, bit_count); } /// Base implementation for subtraction. Subtracts `@max(a.limbs.len, b.limbs.len)` elements from a and b, /// and returns whether any overflow occurred. /// r, a and b may be aliases. /// /// Asserts r has enough elements to hold the result. The upper bound is `@max(a.limbs.len, b.limbs.len)`. fn subCarry(r: *Mutable, a: Const, b: Const) bool { if (a.eqlZero()) { r.copy(b); r.positive = !b.positive; return false; } else if (b.eqlZero()) { r.copy(a); return false; } else if (a.positive != b.positive) { if (a.positive) { // (a) - (-b) => a + b return r.addCarry(a, b.abs()); } else { // (-a) - (b) => -a + -b return r.addCarry(a, b.negate()); } } else if (a.positive) { if (a.order(b) != .lt) { // (a) - (b) => a - b const c = llsubcarry(r.limbs, a.limbs, b.limbs); r.normalize(a.limbs.len); r.positive = true; return c != 0; } else { // (a) - (b) => -b + a => -(b - a) const c = llsubcarry(r.limbs, b.limbs, a.limbs); r.normalize(b.limbs.len); r.positive = false; return c != 0; } } else { if (a.order(b) == .lt) { // (-a) - (-b) => -(a - b) const c = llsubcarry(r.limbs, a.limbs, b.limbs); r.normalize(a.limbs.len); r.positive = false; return c != 0; } else { // (-a) - (-b) => --b + -a => b - a const c = llsubcarry(r.limbs, b.limbs, a.limbs); r.normalize(b.limbs.len); r.positive = true; return c != 0; } } } /// r = a - b /// /// r, a and b may be aliases. /// /// Asserts the result fits in `r`. An upper bound on the number of limbs needed by /// r is `@max(a.limbs.len, b.limbs.len) + 1`. The +1 is not needed if both operands are positive. pub fn sub(r: *Mutable, a: Const, b: Const) void { r.add(a, b.negate()); } /// r = a - b with 2s-complement wrapping semantics. Returns whether any overflow occurred. /// /// r, a and b may be aliases /// Asserts the result fits in `r`. An upper bound on the number of limbs needed by /// r is `calcTwosCompLimbCount(bit_count)`. pub fn subWrap(r: *Mutable, a: Const, b: Const, signedness: Signedness, bit_count: usize) bool { return r.addWrap(a, b.negate(), signedness, bit_count); } /// r = a - b with 2s-complement saturating semantics. /// r, a and b may be aliases. /// /// Assets the result fits in `r`. Upper bound on the number of limbs needed by /// r is `calcTwosCompLimbCount(bit_count)`. pub fn subSat(r: *Mutable, a: Const, b: Const, signedness: Signedness, bit_count: usize) void { r.addSat(a, b.negate(), signedness, bit_count); } /// rma = a * b /// /// `rma` may alias with `a` or `b`. /// `a` and `b` may alias with each other. /// /// Asserts the result fits in `rma`. An upper bound on the number of limbs needed by /// rma is given by `a.limbs.len + b.limbs.len`. /// /// `limbs_buffer` is used for temporary storage. The amount required is given by `calcMulLimbsBufferLen`. pub fn mul(rma: *Mutable, a: Const, b: Const, limbs_buffer: []Limb, allocator: ?Allocator) void { var buf_index: usize = 0; const a_copy = if (rma.limbs.ptr == a.limbs.ptr) blk: { const start = buf_index; @memcpy(limbs_buffer[buf_index..][0..a.limbs.len], a.limbs); buf_index += a.limbs.len; break :blk a.toMutable(limbs_buffer[start..buf_index]).toConst(); } else a; const b_copy = if (rma.limbs.ptr == b.limbs.ptr) blk: { const start = buf_index; @memcpy(limbs_buffer[buf_index..][0..b.limbs.len], b.limbs); buf_index += b.limbs.len; break :blk b.toMutable(limbs_buffer[start..buf_index]).toConst(); } else b; return rma.mulNoAlias(a_copy, b_copy, allocator); } /// rma = a * b /// /// `rma` may not alias with `a` or `b`. /// `a` and `b` may alias with each other. /// /// Asserts the result fits in `rma`. An upper bound on the number of limbs needed by /// rma is given by `a.limbs.len + b.limbs.len`. /// /// If `allocator` is provided, it will be used for temporary storage to improve /// multiplication performance. `error.OutOfMemory` is handled with a fallback algorithm. pub fn mulNoAlias(rma: *Mutable, a: Const, b: Const, allocator: ?Allocator) void { assert(rma.limbs.ptr != a.limbs.ptr); // illegal aliasing assert(rma.limbs.ptr != b.limbs.ptr); // illegal aliasing if (a.limbs.len == 1 and b.limbs.len == 1) { const ov = @mulWithOverflow(a.limbs[0], b.limbs[0]); rma.limbs[0] = ov[0]; if (ov[1] == 0) { rma.len = 1; rma.positive = (a.positive == b.positive); return; } } @memset(rma.limbs[0 .. a.limbs.len + b.limbs.len], 0); llmulacc(.add, allocator, rma.limbs, a.limbs, b.limbs); rma.normalize(a.limbs.len + b.limbs.len); rma.positive = (a.positive == b.positive); } /// rma = a * b with 2s-complement wrapping semantics. /// /// `rma` may alias with `a` or `b`. /// `a` and `b` may alias with each other. /// /// Asserts the result fits in `rma`. An upper bound on the number of limbs needed by /// rma is given by `a.limbs.len + b.limbs.len`. /// /// `limbs_buffer` is used for temporary storage. The amount required is given by `calcMulWrapLimbsBufferLen`. pub fn mulWrap( rma: *Mutable, a: Const, b: Const, signedness: Signedness, bit_count: usize, limbs_buffer: []Limb, allocator: ?Allocator, ) void { var buf_index: usize = 0; const req_limbs = calcTwosCompLimbCount(bit_count); const a_copy = if (rma.limbs.ptr == a.limbs.ptr) blk: { const start = buf_index; const a_len = @min(req_limbs, a.limbs.len); @memcpy(limbs_buffer[buf_index..][0..a_len], a.limbs[0..a_len]); buf_index += a_len; break :blk a.toMutable(limbs_buffer[start..buf_index]).toConst(); } else a; const b_copy = if (rma.limbs.ptr == b.limbs.ptr) blk: { const start = buf_index; const b_len = @min(req_limbs, b.limbs.len); @memcpy(limbs_buffer[buf_index..][0..b_len], b.limbs[0..b_len]); buf_index += b_len; break :blk a.toMutable(limbs_buffer[start..buf_index]).toConst(); } else b; return rma.mulWrapNoAlias(a_copy, b_copy, signedness, bit_count, allocator); } /// rma = a * b with 2s-complement wrapping semantics. /// /// `rma` may not alias with `a` or `b`. /// `a` and `b` may alias with each other. /// /// Asserts the result fits in `rma`. An upper bound on the number of limbs needed by /// rma is given by `a.limbs.len + b.limbs.len`. /// /// If `allocator` is provided, it will be used for temporary storage to improve /// multiplication performance. `error.OutOfMemory` is handled with a fallback algorithm. pub fn mulWrapNoAlias( rma: *Mutable, a: Const, b: Const, signedness: Signedness, bit_count: usize, allocator: ?Allocator, ) void { assert(rma.limbs.ptr != a.limbs.ptr); // illegal aliasing assert(rma.limbs.ptr != b.limbs.ptr); // illegal aliasing const req_limbs = calcTwosCompLimbCount(bit_count); // We can ignore the upper bits here, those results will be discarded anyway. const a_limbs = a.limbs[0..@min(req_limbs, a.limbs.len)]; const b_limbs = b.limbs[0..@min(req_limbs, b.limbs.len)]; @memset(rma.limbs[0..req_limbs], 0); llmulacc(.add, allocator, rma.limbs, a_limbs, b_limbs); rma.normalize(@min(req_limbs, a.limbs.len + b.limbs.len)); rma.positive = (a.positive == b.positive); rma.truncate(rma.toConst(), signedness, bit_count); } /// r = @bitReverse(a) with 2s-complement semantics. /// r and a may be aliases. /// /// Asserts the result fits in `r`. Upper bound on the number of limbs needed by /// r is `calcTwosCompLimbCount(bit_count)`. pub fn bitReverse(r: *Mutable, a: Const, signedness: Signedness, bit_count: usize) void { if (bit_count == 0) return; r.copy(a); const limbs_required = calcTwosCompLimbCount(bit_count); if (!a.positive) { r.positive = true; // Negate. r.bitNotWrap(r.toConst(), .unsigned, bit_count); // Bitwise NOT. r.addScalar(r.toConst(), 1); // Add one. } else if (limbs_required > a.limbs.len) { // Zero-extend to our output length for (r.limbs[a.limbs.len..limbs_required]) |*limb| { limb.* = 0; } r.len = limbs_required; } // 0b0..01..1000 with @log2(@sizeOf(Limb)) consecutive ones const endian_mask: usize = (@sizeOf(Limb) - 1) << 3; const bytes = std.mem.sliceAsBytes(r.limbs); var k: usize = 0; while (k < ((bit_count + 1) / 2)) : (k += 1) { var i = k; var rev_i = bit_count - i - 1; // This "endian mask" remaps a low (LE) byte to the corresponding high // (BE) byte in the Limb, without changing which limbs we are indexing if (native_endian == .big) { i ^= endian_mask; rev_i ^= endian_mask; } const bit_i = std.mem.readPackedInt(u1, bytes, i, .little); const bit_rev_i = std.mem.readPackedInt(u1, bytes, rev_i, .little); std.mem.writePackedInt(u1, bytes, i, bit_rev_i, .little); std.mem.writePackedInt(u1, bytes, rev_i, bit_i, .little); } // Calculate signed-magnitude representation for output if (signedness == .signed) { const last_bit = switch (native_endian) { .little => std.mem.readPackedInt(u1, bytes, bit_count - 1, .little), .big => std.mem.readPackedInt(u1, bytes, (bit_count - 1) ^ endian_mask, .little), }; if (last_bit == 1) { r.bitNotWrap(r.toConst(), .unsigned, bit_count); // Bitwise NOT. r.addScalar(r.toConst(), 1); // Add one. r.positive = false; // Negate. } } r.normalize(r.len); } /// r = @byteSwap(a) with 2s-complement semantics. /// r and a may be aliases. /// /// Asserts the result fits in `r`. Upper bound on the number of limbs needed by /// r is `calcTwosCompLimbCount(8*byte_count)`. pub fn byteSwap(r: *Mutable, a: Const, signedness: Signedness, byte_count: usize) void { if (byte_count == 0) return; r.copy(a); const limbs_required = calcTwosCompLimbCount(8 * byte_count); if (!a.positive) { r.positive = true; // Negate. r.bitNotWrap(r.toConst(), .unsigned, 8 * byte_count); // Bitwise NOT. r.addScalar(r.toConst(), 1); // Add one. } else if (limbs_required > a.limbs.len) { // Zero-extend to our output length for (r.limbs[a.limbs.len..limbs_required]) |*limb| { limb.* = 0; } r.len = limbs_required; } // 0b0..01..1 with @log2(@sizeOf(Limb)) trailing ones const endian_mask: usize = @sizeOf(Limb) - 1; var bytes = std.mem.sliceAsBytes(r.limbs); assert(bytes.len >= byte_count); var k: usize = 0; while (k < (byte_count + 1) / 2) : (k += 1) { var i = k; var rev_i = byte_count - k - 1; // This "endian mask" remaps a low (LE) byte to the corresponding high // (BE) byte in the Limb, without changing which limbs we are indexing if (native_endian == .big) { i ^= endian_mask; rev_i ^= endian_mask; } const byte_i = bytes[i]; const byte_rev_i = bytes[rev_i]; bytes[rev_i] = byte_i; bytes[i] = byte_rev_i; } // Calculate signed-magnitude representation for output if (signedness == .signed) { const last_byte = switch (native_endian) { .little => bytes[byte_count - 1], .big => bytes[(byte_count - 1) ^ endian_mask], }; if (last_byte & (1 << 7) != 0) { // Check sign bit of last byte r.bitNotWrap(r.toConst(), .unsigned, 8 * byte_count); // Bitwise NOT. r.addScalar(r.toConst(), 1); // Add one. r.positive = false; // Negate. } } r.normalize(r.len); } /// r = @popCount(a) with 2s-complement semantics. /// r and a may be aliases. /// /// Assets the result fits in `r`. Upper bound on the number of limbs needed by /// r is `calcTwosCompLimbCount(bit_count)`. pub fn popCount(r: *Mutable, a: Const, bit_count: usize) void { r.copy(a); if (!a.positive) { r.positive = true; // Negate. r.bitNotWrap(r.toConst(), .unsigned, bit_count); // Bitwise NOT. r.addScalar(r.toConst(), 1); // Add one. } var sum: Limb = 0; for (r.limbs[0..r.len]) |limb| { sum += @popCount(limb); } r.set(sum); } /// rma = a * a /// /// `rma` may not alias with `a`. /// /// Asserts the result fits in `rma`. An upper bound on the number of limbs needed by /// rma is given by `2 * a.limbs.len + 1`. /// /// If `allocator` is provided, it will be used for temporary storage to improve /// multiplication performance. `error.OutOfMemory` is handled with a fallback algorithm. pub fn sqrNoAlias(rma: *Mutable, a: Const, opt_allocator: ?Allocator) void { _ = opt_allocator; assert(rma.limbs.ptr != a.limbs.ptr); // illegal aliasing @memset(rma.limbs, 0); llsquareBasecase(rma.limbs, a.limbs); rma.normalize(2 * a.limbs.len + 1); rma.positive = true; } /// q = a / b (rem r) /// /// a / b are floored (rounded towards 0). /// q may alias with a or b. /// /// Asserts there is enough memory to store q and r. /// The upper bound for r limb count is `b.limbs.len`. /// The upper bound for q limb count is given by `a.limbs`. /// /// `limbs_buffer` is used for temporary storage. The amount required is given by `calcDivLimbsBufferLen`. pub fn divFloor( q: *Mutable, r: *Mutable, a: Const, b: Const, limbs_buffer: []Limb, ) void { const sep = a.limbs.len + 2; var x = a.toMutable(limbs_buffer[0..sep]); var y = b.toMutable(limbs_buffer[sep..]); div(q, r, &x, &y); // Note, `div` performs truncating division, which satisfies // @divTrunc(a, b) * b + @rem(a, b) = a // so r = a - @divTrunc(a, b) * b // Note, @rem(a, -b) = @rem(-b, a) = -@rem(a, b) = -@rem(-a, -b) // For divTrunc, we want to perform // @divFloor(a, b) * b + @mod(a, b) = a // Note: // @divFloor(-a, b) // = @divFloor(a, -b) // = -@divCeil(a, b) // = -@divFloor(a + b - 1, b) // = -@divTrunc(a + b - 1, b) // Note (1): // @divTrunc(a + b - 1, b) * b + @rem(a + b - 1, b) = a + b - 1 // = @divTrunc(a + b - 1, b) * b + @rem(a - 1, b) = a + b - 1 // = @divTrunc(a + b - 1, b) * b + @rem(a - 1, b) - b + 1 = a if (a.positive and b.positive) { // Positive-positive case, don't need to do anything. } else if (a.positive and !b.positive) { // a/-b -> q is negative, and so we need to fix flooring. // Subtract one to make the division flooring. // @divFloor(a, -b) * -b + @mod(a, -b) = a // If b divides a exactly, we have @divFloor(a, -b) * -b = a // Else, we have @divFloor(a, -b) * -b > a, so @mod(a, -b) becomes negative // We have: // @divFloor(a, -b) * -b + @mod(a, -b) = a // = -@divTrunc(a + b - 1, b) * -b + @mod(a, -b) = a // = @divTrunc(a + b - 1, b) * b + @mod(a, -b) = a // Substitute a for (1): // @divTrunc(a + b - 1, b) * b + @rem(a - 1, b) - b + 1 = @divTrunc(a + b - 1, b) * b + @mod(a, -b) // Yields: // @mod(a, -b) = @rem(a - 1, b) - b + 1 // Note that `r` holds @rem(a, b) at this point. // // If @rem(a, b) is not 0: // @rem(a - 1, b) = @rem(a, b) - 1 // => @mod(a, -b) = @rem(a, b) - 1 - b + 1 = @rem(a, b) - b // Else: // @rem(a - 1, b) = @rem(a + b - 1, b) = @rem(b - 1, b) = b - 1 // => @mod(a, -b) = b - 1 - b + 1 = 0 if (!r.eqlZero()) { q.addScalar(q.toConst(), -1); r.positive = true; r.sub(r.toConst(), y.toConst().abs()); } } else if (!a.positive and b.positive) { // -a/b -> q is negative, and so we need to fix flooring. // Subtract one to make the division flooring. // @divFloor(-a, b) * b + @mod(-a, b) = a // If b divides a exactly, we have @divFloor(-a, b) * b = -a // Else, we have @divFloor(-a, b) * b < -a, so @mod(-a, b) becomes positive // We have: // @divFloor(-a, b) * b + @mod(-a, b) = -a // = -@divTrunc(a + b - 1, b) * b + @mod(-a, b) = -a // = @divTrunc(a + b - 1, b) * b - @mod(-a, b) = a // Substitute a for (1): // @divTrunc(a + b - 1, b) * b + @rem(a - 1, b) - b + 1 = @divTrunc(a + b - 1, b) * b - @mod(-a, b) // Yields: // @rem(a - 1, b) - b + 1 = -@mod(-a, b) // => -@mod(-a, b) = @rem(a - 1, b) - b + 1 // => @mod(-a, b) = -(@rem(a - 1, b) - b + 1) = -@rem(a - 1, b) + b - 1 // // If @rem(a, b) is not 0: // @rem(a - 1, b) = @rem(a, b) - 1 // => @mod(-a, b) = -(@rem(a, b) - 1) + b - 1 = -@rem(a, b) + 1 + b - 1 = -@rem(a, b) + b // Else : // @rem(a - 1, b) = b - 1 // => @mod(-a, b) = -(b - 1) + b - 1 = 0 if (!r.eqlZero()) { q.addScalar(q.toConst(), -1); r.positive = false; r.add(r.toConst(), y.toConst().abs()); } } else if (!a.positive and !b.positive) { // a/b -> q is positive, don't need to do anything to fix flooring. // @divFloor(-a, -b) * -b + @mod(-a, -b) = -a // If b divides a exactly, we have @divFloor(-a, -b) * -b = -a // Else, we have @divFloor(-a, -b) * -b > -a, so @mod(-a, -b) becomes negative // We have: // @divFloor(-a, -b) * -b + @mod(-a, -b) = -a // = @divTrunc(a, b) * -b + @mod(-a, -b) = -a // = @divTrunc(a, b) * b - @mod(-a, -b) = a // We also have: // @divTrunc(a, b) * b + @rem(a, b) = a // Substitute a: // @divTrunc(a, b) * b + @rem(a, b) = @divTrunc(a, b) * b - @mod(-a, -b) // => @rem(a, b) = -@mod(-a, -b) // => @mod(-a, -b) = -@rem(a, b) r.positive = false; } } /// q = a / b (rem r) /// /// a / b are truncated (rounded towards -inf). /// q may alias with a or b. /// /// Asserts there is enough memory to store q and r. /// The upper bound for r limb count is `b.limbs.len`. /// The upper bound for q limb count is given by `a.limbs.len`. /// /// `limbs_buffer` is used for temporary storage. The amount required is given by `calcDivLimbsBufferLen`. pub fn divTrunc( q: *Mutable, r: *Mutable, a: Const, b: Const, limbs_buffer: []Limb, ) void { const sep = a.limbs.len + 2; var x = a.toMutable(limbs_buffer[0..sep]); var y = b.toMutable(limbs_buffer[sep..]); div(q, r, &x, &y); } /// r = a << shift, in other words, r = a * 2^shift /// /// r and a may alias. /// /// Asserts there is enough memory to fit the result. The upper bound Limb count is /// `a.limbs.len + (shift / (@sizeOf(Limb) * 8))`. pub fn shiftLeft(r: *Mutable, a: Const, shift: usize) void { llshl(r.limbs, a.limbs, shift); r.normalize(a.limbs.len + (shift / limb_bits) + 1); r.positive = a.positive; } /// r = a <<| shift with 2s-complement saturating semantics. /// /// r and a may alias. /// /// Asserts there is enough memory to fit the result. The upper bound Limb count is /// r is `calcTwosCompLimbCount(bit_count)`. pub fn shiftLeftSat(r: *Mutable, a: Const, shift: usize, signedness: Signedness, bit_count: usize) void { // Special case: When the argument is negative, but the result is supposed to be unsigned, // return 0 in all cases. if (!a.positive and signedness == .unsigned) { r.set(0); return; } // Check whether the shift is going to overflow. This is the case // when (in 2s complement) any bit above `bit_count - shift` is set in the unshifted value. // Note, the sign bit is not counted here. // Handle shifts larger than the target type. This also deals with // 0-bit integers. if (bit_count <= shift) { // In this case, there is only no overflow if `a` is zero. if (a.eqlZero()) { r.set(0); } else { r.setTwosCompIntLimit(if (a.positive) .max else .min, signedness, bit_count); } return; } const checkbit = bit_count - shift - @intFromBool(signedness == .signed); // If `checkbit` and more significant bits are zero, no overflow will take place. if (checkbit >= a.limbs.len * limb_bits) { // `checkbit` is outside the range of a, so definitely no overflow will take place. We // can defer to a normal shift. // Note that if `a` is normalized (which we assume), this checks for set bits in the upper limbs. // Note, in this case r should already have enough limbs required to perform the normal shift. // In this case the shift of the most significant limb may still overflow. r.shiftLeft(a, shift); return; } else if (checkbit < (a.limbs.len - 1) * limb_bits) { // `checkbit` is not in the most significant limb. If `a` is normalized the most significant // limb will not be zero, so in this case we need to saturate. Note that `a.limbs.len` must be // at least one according to normalization rules. r.setTwosCompIntLimit(if (a.positive) .max else .min, signedness, bit_count); return; } // Generate a mask with the bits to check in the most significant limb. We'll need to check // all bits with equal or more significance than checkbit. // const msb = @truncate(Log2Limb, checkbit); // const checkmask = (@as(Limb, 1) << msb) -% 1; if (a.limbs[a.limbs.len - 1] >> @as(Log2Limb, @truncate(checkbit)) != 0) { // Need to saturate. r.setTwosCompIntLimit(if (a.positive) .max else .min, signedness, bit_count); return; } // This shift should not be able to overflow, so invoke llshl and normalize manually // to avoid the extra required limb. llshl(r.limbs, a.limbs, shift); r.normalize(a.limbs.len + (shift / limb_bits)); r.positive = a.positive; } /// r = a >> shift /// r and a may alias. /// /// Asserts there is enough memory to fit the result. The upper bound Limb count is /// `a.limbs.len - (shift / (@sizeOf(Limb) * 8))`. pub fn shiftRight(r: *Mutable, a: Const, shift: usize) void { const full_limbs_shifted_out = shift / limb_bits; const remaining_bits_shifted_out = shift % limb_bits; if (a.limbs.len <= full_limbs_shifted_out) { // Shifting negative numbers converges to -1 instead of 0 if (a.positive) { r.len = 1; r.positive = true; r.limbs[0] = 0; } else { r.len = 1; r.positive = false; r.limbs[0] = 1; } return; } const nonzero_negative_shiftout = if (a.positive) false else nonzero: { for (a.limbs[0..full_limbs_shifted_out]) |x| { if (x != 0) break :nonzero true; } if (remaining_bits_shifted_out == 0) break :nonzero false; const not_covered: Log2Limb = @intCast(limb_bits - remaining_bits_shifted_out); break :nonzero a.limbs[full_limbs_shifted_out] << not_covered != 0; }; llshr(r.limbs, a.limbs, shift); r.len = a.limbs.len - full_limbs_shifted_out; r.positive = a.positive; if (nonzero_negative_shiftout) r.addScalar(r.toConst(), -1); r.normalize(r.len); } /// r = ~a under 2s complement wrapping semantics. /// r may alias with a. /// /// Assets that r has enough limbs to store the result. The upper bound Limb count is /// r is `calcTwosCompLimbCount(bit_count)`. pub fn bitNotWrap(r: *Mutable, a: Const, signedness: Signedness, bit_count: usize) void { r.copy(a.negate()); const negative_one = Const{ .limbs = &.{1}, .positive = false }; _ = r.addWrap(r.toConst(), negative_one, signedness, bit_count); } /// r = a | b under 2s complement semantics. /// r may alias with a or b. /// /// a and b are zero-extended to the longer of a or b. /// /// Asserts that r has enough limbs to store the result. Upper bound is `@max(a.limbs.len, b.limbs.len)`. pub fn bitOr(r: *Mutable, a: Const, b: Const) void { // Trivial cases, llsignedor does not support zero. if (a.eqlZero()) { r.copy(b); return; } else if (b.eqlZero()) { r.copy(a); return; } if (a.limbs.len >= b.limbs.len) { r.positive = llsignedor(r.limbs, a.limbs, a.positive, b.limbs, b.positive); r.normalize(if (b.positive) a.limbs.len else b.limbs.len); } else { r.positive = llsignedor(r.limbs, b.limbs, b.positive, a.limbs, a.positive); r.normalize(if (a.positive) b.limbs.len else a.limbs.len); } } /// r = a & b under 2s complement semantics. /// r may alias with a or b. /// /// Asserts that r has enough limbs to store the result. /// If only a is positive, the upper bound is `a.limbs.len`. /// If only b is positive, the upper bound is `b.limbs.len`. /// If a and b are positive, the upper bound is `@min(a.limbs.len, b.limbs.len)`. /// If a and b are negative, the upper bound is `@max(a.limbs.len, b.limbs.len) + 1`. pub fn bitAnd(r: *Mutable, a: Const, b: Const) void { // Trivial cases, llsignedand does not support zero. if (a.eqlZero()) { r.copy(a); return; } else if (b.eqlZero()) { r.copy(b); return; } if (a.limbs.len >= b.limbs.len) { r.positive = llsignedand(r.limbs, a.limbs, a.positive, b.limbs, b.positive); r.normalize(if (b.positive) b.limbs.len else if (a.positive) a.limbs.len else a.limbs.len + 1); } else { r.positive = llsignedand(r.limbs, b.limbs, b.positive, a.limbs, a.positive); r.normalize(if (a.positive) a.limbs.len else if (b.positive) b.limbs.len else b.limbs.len + 1); } } /// r = a ^ b under 2s complement semantics. /// r may alias with a or b. /// /// Asserts that r has enough limbs to store the result. If a and b share the same signedness, the /// upper bound is `@max(a.limbs.len, b.limbs.len)`. Otherwise, if either a or b is negative /// but not both, the upper bound is `@max(a.limbs.len, b.limbs.len) + 1`. pub fn bitXor(r: *Mutable, a: Const, b: Const) void { // Trivial cases, because llsignedxor does not support negative zero. if (a.eqlZero()) { r.copy(b); return; } else if (b.eqlZero()) { r.copy(a); return; } if (a.limbs.len > b.limbs.len) { r.positive = llsignedxor(r.limbs, a.limbs, a.positive, b.limbs, b.positive); r.normalize(a.limbs.len + @intFromBool(a.positive != b.positive)); } else { r.positive = llsignedxor(r.limbs, b.limbs, b.positive, a.limbs, a.positive); r.normalize(b.limbs.len + @intFromBool(a.positive != b.positive)); } } /// rma may alias x or y. /// x and y may alias each other. /// Asserts that `rma` has enough limbs to store the result. Upper bound is /// `@min(x.limbs.len, y.limbs.len)`. /// /// `limbs_buffer` is used for temporary storage during the operation. When this function returns, /// it will have the same length as it had when the function was called. pub fn gcd(rma: *Mutable, x: Const, y: Const, limbs_buffer: *std.ArrayList(Limb)) !void { const prev_len = limbs_buffer.items.len; defer limbs_buffer.shrinkRetainingCapacity(prev_len); const x_copy = if (rma.limbs.ptr == x.limbs.ptr) blk: { const start = limbs_buffer.items.len; try limbs_buffer.appendSlice(x.limbs); break :blk x.toMutable(limbs_buffer.items[start..]).toConst(); } else x; const y_copy = if (rma.limbs.ptr == y.limbs.ptr) blk: { const start = limbs_buffer.items.len; try limbs_buffer.appendSlice(y.limbs); break :blk y.toMutable(limbs_buffer.items[start..]).toConst(); } else y; return gcdLehmer(rma, x_copy, y_copy, limbs_buffer); } /// q = a ^ b /// /// r may not alias a. /// /// Asserts that `r` has enough limbs to store the result. Upper bound is /// `calcPowLimbsBufferLen(a.bitCountAbs(), b)`. /// /// `limbs_buffer` is used for temporary storage. /// The amount required is given by `calcPowLimbsBufferLen`. pub fn pow(r: *Mutable, a: Const, b: u32, limbs_buffer: []Limb) void { assert(r.limbs.ptr != a.limbs.ptr); // illegal aliasing // Handle all the trivial cases first switch (b) { 0 => { // a^0 = 1 return r.set(1); }, 1 => { // a^1 = a return r.copy(a); }, else => {}, } if (a.eqlZero()) { // 0^b = 0 return r.set(0); } else if (a.limbs.len == 1 and a.limbs[0] == 1) { // 1^b = 1 and -1^b = ±1 r.set(1); r.positive = a.positive or (b & 1) == 0; return; } // Here a>1 and b>1 const needed_limbs = calcPowLimbsBufferLen(a.bitCountAbs(), b); assert(r.limbs.len >= needed_limbs); assert(limbs_buffer.len >= needed_limbs); llpow(r.limbs, a.limbs, b, limbs_buffer); r.normalize(needed_limbs); r.positive = a.positive or (b & 1) == 0; } /// r = ⌊√a⌋ /// /// r may alias a. /// /// Asserts that `r` has enough limbs to store the result. Upper bound is /// `(a.limbs.len - 1) / 2 + 1`. /// /// `limbs_buffer` is used for temporary storage. /// The amount required is given by `calcSqrtLimbsBufferLen`. pub fn sqrt( r: *Mutable, a: Const, limbs_buffer: []Limb, ) void { // Brent and Zimmermann, Modern Computer Arithmetic, Algorithm 1.13 SqrtInt // https://members.loria.fr/PZimmermann/mca/pub226.html var buf_index: usize = 0; var t = b: { const start = buf_index; buf_index += a.limbs.len; break :b Mutable.init(limbs_buffer[start..buf_index], 0); }; var u = b: { const start = buf_index; const shift = (a.bitCountAbs() + 1) / 2; buf_index += 1 + ((shift / limb_bits) + 1); var m = Mutable.init(limbs_buffer[start..buf_index], 1); m.shiftLeft(m.toConst(), shift); // u must be >= ⌊√a⌋, and should be as small as possible for efficiency break :b m; }; var s = b: { const start = buf_index; buf_index += u.limbs.len; break :b u.toConst().toMutable(limbs_buffer[start..buf_index]); }; var rem = b: { const start = buf_index; buf_index += s.limbs.len; break :b Mutable.init(limbs_buffer[start..buf_index], 0); }; while (true) { t.divFloor(&rem, a, s.toConst(), limbs_buffer[buf_index..]); t.add(t.toConst(), s.toConst()); u.shiftRight(t.toConst(), 1); if (u.toConst().order(s.toConst()).compare(.gte)) { r.copy(s.toConst()); return; } // Avoid copying u to s by swapping u and s const tmp_s = s; s = u; u = tmp_s; } } /// rma may not alias x or y. /// x and y may alias each other. /// Asserts that `rma` has enough limbs to store the result. Upper bound is given by `calcGcdNoAliasLimbLen`. /// /// `limbs_buffer` is used for temporary storage during the operation. pub fn gcdNoAlias(rma: *Mutable, x: Const, y: Const, limbs_buffer: *std.ArrayList(Limb)) !void { assert(rma.limbs.ptr != x.limbs.ptr); // illegal aliasing assert(rma.limbs.ptr != y.limbs.ptr); // illegal aliasing return gcdLehmer(rma, x, y, limbs_buffer); } fn gcdLehmer(result: *Mutable, xa: Const, ya: Const, limbs_buffer: *std.ArrayList(Limb)) !void { var x = try xa.toManaged(limbs_buffer.allocator); defer x.deinit(); x.abs(); var y = try ya.toManaged(limbs_buffer.allocator); defer y.deinit(); y.abs(); if (x.toConst().order(y.toConst()) == .lt) { x.swap(&y); } var t_big = try Managed.init(limbs_buffer.allocator); defer t_big.deinit(); var r = try Managed.init(limbs_buffer.allocator); defer r.deinit(); var tmp_x = try Managed.init(limbs_buffer.allocator); defer tmp_x.deinit(); while (y.len() > 1 and !y.eqlZero()) { assert(x.isPositive() and y.isPositive()); assert(x.len() >= y.len()); var xh: SignedDoubleLimb = x.limbs[x.len() - 1]; var yh: SignedDoubleLimb = if (x.len() > y.len()) 0 else y.limbs[x.len() - 1]; var A: SignedDoubleLimb = 1; var B: SignedDoubleLimb = 0; var C: SignedDoubleLimb = 0; var D: SignedDoubleLimb = 1; while (yh + C != 0 and yh + D != 0) { const q = @divFloor(xh + A, yh + C); const qp = @divFloor(xh + B, yh + D); if (q != qp) { break; } var t = A - q * C; A = C; C = t; t = B - q * D; B = D; D = t; t = xh - q * yh; xh = yh; yh = t; } if (B == 0) { // t_big = x % y, r is unused try r.divTrunc(&t_big, &x, &y); assert(t_big.isPositive()); x.swap(&y); y.swap(&t_big); } else { var storage: [8]Limb = undefined; const Ap = fixedIntFromSignedDoubleLimb(A, storage[0..2]).toManaged(limbs_buffer.allocator); const Bp = fixedIntFromSignedDoubleLimb(B, storage[2..4]).toManaged(limbs_buffer.allocator); const Cp = fixedIntFromSignedDoubleLimb(C, storage[4..6]).toManaged(limbs_buffer.allocator); const Dp = fixedIntFromSignedDoubleLimb(D, storage[6..8]).toManaged(limbs_buffer.allocator); // t_big = Ax + By try r.mul(&x, &Ap); try t_big.mul(&y, &Bp); try t_big.add(&r, &t_big); // u = Cx + Dy, r as u try tmp_x.copy(x.toConst()); try x.mul(&tmp_x, &Cp); try r.mul(&y, &Dp); try r.add(&x, &r); x.swap(&t_big); y.swap(&r); } } // euclidean algorithm assert(x.toConst().order(y.toConst()) != .lt); while (!y.toConst().eqlZero()) { try t_big.divTrunc(&r, &x, &y); x.swap(&y); y.swap(&r); } result.copy(x.toConst()); } // Truncates by default. fn div(q: *Mutable, r: *Mutable, x: *Mutable, y: *Mutable) void { assert(!y.eqlZero()); // division by zero assert(q != r); // illegal aliasing const q_positive = (x.positive == y.positive); const r_positive = x.positive; if (x.toConst().orderAbs(y.toConst()) == .lt) { // q may alias x so handle r first. r.copy(x.toConst()); r.positive = r_positive; q.set(0); return; } // Handle trailing zero-words of divisor/dividend. These are not handled in the following // algorithms. // Note, there must be a non-zero limb for either. // const x_trailing = std.mem.indexOfScalar(Limb, x.limbs[0..x.len], 0).?; // const y_trailing = std.mem.indexOfScalar(Limb, y.limbs[0..y.len], 0).?; const x_trailing = for (x.limbs[0..x.len], 0..) |xi, i| { if (xi != 0) break i; } else unreachable; const y_trailing = for (y.limbs[0..y.len], 0..) |yi, i| { if (yi != 0) break i; } else unreachable; const xy_trailing = @min(x_trailing, y_trailing); if (y.len - xy_trailing == 1) { const divisor = y.limbs[y.len - 1]; // Optimization for small divisor. By using a half limb we can avoid requiring DoubleLimb // divisions in the hot code path. This may often require compiler_rt software-emulation. if (divisor < maxInt(HalfLimb)) { lldiv0p5(q.limbs, &r.limbs[0], x.limbs[xy_trailing..x.len], @as(HalfLimb, @intCast(divisor))); } else { lldiv1(q.limbs, &r.limbs[0], x.limbs[xy_trailing..x.len], divisor); } q.normalize(x.len - xy_trailing); q.positive = q_positive; r.len = 1; r.positive = r_positive; } else { // Shrink x, y such that the trailing zero limbs shared between are removed. var x0 = Mutable{ .limbs = x.limbs[xy_trailing..], .len = x.len - xy_trailing, .positive = true, }; var y0 = Mutable{ .limbs = y.limbs[xy_trailing..], .len = y.len - xy_trailing, .positive = true, }; divmod(q, r, &x0, &y0); q.positive = q_positive; r.positive = r_positive; } if (xy_trailing != 0 and r.limbs[r.len - 1] != 0) { // Manually shift here since we know its limb aligned. mem.copyBackwards(Limb, r.limbs[xy_trailing..], r.limbs[0..r.len]); @memset(r.limbs[0..xy_trailing], 0); r.len += xy_trailing; } } /// Handbook of Applied Cryptography, 14.20 /// /// x = qy + r where 0 <= r < y /// y is modified but returned intact. fn divmod( q: *Mutable, r: *Mutable, x: *Mutable, y: *Mutable, ) void { // 0. // Normalize so that y[t] > b/2 const lz = @clz(y.limbs[y.len - 1]); const norm_shift = if (lz == 0 and y.toConst().isOdd()) limb_bits // Force an extra limb so that y is even. else lz; x.shiftLeft(x.toConst(), norm_shift); y.shiftLeft(y.toConst(), norm_shift); const n = x.len - 1; const t = y.len - 1; const shift = n - t; // 1. // for 0 <= j <= n - t, set q[j] to 0 q.len = shift + 1; q.positive = true; @memset(q.limbs[0..q.len], 0); // 2. // while x >= y * b^(n - t): // x -= y * b^(n - t) // q[n - t] += 1 // Note, this algorithm is performed only once if y[t] > base/2 and y is even, which we // enforced in step 0. This means we can replace the while with an if. // Note, multiplication by b^(n - t) comes down to shifting to the right by n - t limbs. // We can also replace x >= y * b^(n - t) by x/b^(n - t) >= y, and use shifts for that. { // x >= y * b^(n - t) can be replaced by x/b^(n - t) >= y. // 'divide' x by b^(n - t) var tmp = Mutable{ .limbs = x.limbs[shift..], .len = x.len - shift, .positive = true, }; if (tmp.toConst().order(y.toConst()) != .lt) { // Perform x -= y * b^(n - t) // Note, we can subtract y from x[n - t..] and get the result without shifting. // We can also re-use tmp which already contains the relevant part of x. Note that // this also edits x. // Due to the check above, this cannot underflow. tmp.sub(tmp.toConst(), y.toConst()); // tmp.sub normalized tmp, but we need to normalize x now. x.limbs.len = tmp.limbs.len + shift; q.limbs[shift] += 1; } } // 3. // for i from n down to t + 1, do var i = n; while (i >= t + 1) : (i -= 1) { const k = i - t - 1; // 3.1. // if x_i == y_t: // q[i - t - 1] = b - 1 // else: // q[i - t - 1] = (x[i] * b + x[i - 1]) / y[t] if (x.limbs[i] == y.limbs[t]) { q.limbs[k] = maxInt(Limb); } else { const q0 = (@as(DoubleLimb, x.limbs[i]) << limb_bits) | @as(DoubleLimb, x.limbs[i - 1]); const n0 = @as(DoubleLimb, y.limbs[t]); q.limbs[k] = @as(Limb, @intCast(q0 / n0)); } // 3.2 // while q[i - t - 1] * (y[t] * b + y[t - 1] > x[i] * b * b + x[i - 1] + x[i - 2]: // q[i - t - 1] -= 1 // Note, if y[t] > b / 2 this part is repeated no more than twice. // Extract from y. const y0 = if (t > 0) y.limbs[t - 1] else 0; const y1 = y.limbs[t]; // Extract from x. // Note, big endian. const tmp0 = [_]Limb{ x.limbs[i], if (i >= 1) x.limbs[i - 1] else 0, if (i >= 2) x.limbs[i - 2] else 0, }; while (true) { // Ad-hoc 2x1 multiplication with q[i - t - 1]. // Note, big endian. var tmp1 = [_]Limb{ 0, undefined, undefined }; tmp1[2] = addMulLimbWithCarry(0, y0, q.limbs[k], &tmp1[0]); tmp1[1] = addMulLimbWithCarry(0, y1, q.limbs[k], &tmp1[0]); // Big-endian compare if (mem.order(Limb, &tmp1, &tmp0) != .gt) break; q.limbs[k] -= 1; } // 3.3. // x -= q[i - t - 1] * y * b^(i - t - 1) // Note, we multiply by a single limb here. // The shift doesn't need to be performed if we add the result of the first multiplication // to x[i - t - 1]. const underflow = llmulLimb(.sub, x.limbs[k..x.len], y.limbs[0..y.len], q.limbs[k]); // 3.4. // if x < 0: // x += y * b^(i - t - 1) // q[i - t - 1] -= 1 // Note, we check for x < 0 using the underflow flag from the previous operation. if (underflow) { // While we didn't properly set the signedness of x, this operation should 'flow' it back to positive. llaccum(.add, x.limbs[k..x.len], y.limbs[0..y.len]); q.limbs[k] -= 1; } } x.normalize(x.len); q.normalize(q.len); // De-normalize r and y. r.shiftRight(x.toConst(), norm_shift); y.shiftRight(y.toConst(), norm_shift); } /// Truncate an integer to a number of bits, following 2s-complement semantics. /// `r` may alias `a`. /// /// Asserts `r` has enough storage to compute the result. /// The upper bound is `calcTwosCompLimbCount(a.len)`. pub fn truncate(r: *Mutable, a: Const, signedness: Signedness, bit_count: usize) void { // Handle 0-bit integers. if (bit_count == 0) { @branchHint(.unlikely); r.set(0); return; } const max_limbs = calcTwosCompLimbCount(bit_count); const sign_bit = @as(Limb, 1) << @truncate(bit_count - 1); const mask = @as(Limb, maxInt(Limb)) >> @truncate(-%bit_count); // Guess whether the result will have the same sign as `a`. // * If the result will be signed zero, the guess is `true`. // * If the result will be the minimum signed integer, the guess is `false`. // * If the result will be unsigned zero, the guess is `a.positive`. // * Otherwise the guess is correct. const same_sign_guess = switch (signedness) { .signed => max_limbs > a.limbs.len or a.limbs[max_limbs - 1] & sign_bit == 0, .unsigned => a.positive, }; const abs_trunc_a: Const = .{ .positive = true, .limbs = a.limbs[0..llnormalize(a.limbs[0..@min(a.limbs.len, max_limbs)])], }; if (same_sign_guess or abs_trunc_a.eqlZero()) { // One of the following is true: // * The result is zero. // * The result is non-zero and has the same sign as `a`. r.copy(abs_trunc_a); if (max_limbs <= r.len) r.limbs[max_limbs - 1] &= mask; r.normalize(r.len); r.positive = a.positive or r.eqlZero(); } else { // One of the following is true: // * The result is the minimum signed integer. // * The result is unsigned zero. // * The result is non-zero and has the opposite sign as `a`. r.addScalar(abs_trunc_a, -1); llnot(r.limbs[0..r.len]); @memset(r.limbs[r.len..max_limbs], maxInt(Limb)); r.limbs[max_limbs - 1] &= mask; r.normalize(max_limbs); r.positive = switch (signedness) { // The only value with the sign bit still set is the minimum signed integer. .signed => !a.positive and r.limbs[max_limbs - 1] & sign_bit == 0, .unsigned => !a.positive or r.eqlZero(), }; } } /// Saturate an integer to a number of bits, following 2s-complement semantics. /// r may alias a. /// /// Asserts `r` has enough storage to store the result. /// The upper bound is `calcTwosCompLimbCount(a.len)`. pub fn saturate(r: *Mutable, a: Const, signedness: Signedness, bit_count: usize) void { if (!a.fitsInTwosComp(signedness, bit_count)) { r.setTwosCompIntLimit(if (r.positive) .max else .min, signedness, bit_count); } } /// Read the value of `x` from `buffer`. /// Asserts that `buffer` is large enough to contain a value of bit-size `bit_count`. /// /// The contents of `buffer` are interpreted as if they were the contents of /// @ptrCast(*[buffer.len]const u8, &x). Byte ordering is determined by `endian` /// and any required padding bits are expected on the MSB end. pub fn readTwosComplement( x: *Mutable, buffer: []const u8, bit_count: usize, endian: Endian, signedness: Signedness, ) void { return readPackedTwosComplement(x, buffer, 0, bit_count, endian, signedness); } /// Read the value of `x` from a packed memory `buffer`. /// Asserts that `buffer` is large enough to contain a value of bit-size `bit_count` /// at offset `bit_offset`. /// /// This is equivalent to loading the value of an integer with `bit_count` bits as /// if it were a field in packed memory at the provided bit offset. pub fn readPackedTwosComplement( x: *Mutable, buffer: []const u8, bit_offset: usize, bit_count: usize, endian: Endian, signedness: Signedness, ) void { if (bit_count == 0) { x.limbs[0] = 0; x.len = 1; x.positive = true; return; } // Check whether the input is negative var positive = true; if (signedness == .signed) { const total_bits = bit_offset + bit_count; const last_byte = switch (endian) { .little => ((total_bits + 7) / 8) - 1, .big => buffer.len - ((total_bits + 7) / 8), }; const sign_bit = @as(u8, 1) << @as(u3, @intCast((total_bits - 1) % 8)); positive = ((buffer[last_byte] & sign_bit) == 0); } // Copy all complete limbs var carry: u1 = 1; var limb_index: usize = 0; var bit_index: usize = 0; while (limb_index < bit_count / @bitSizeOf(Limb)) : (limb_index += 1) { // Read one Limb of bits var limb = mem.readPackedInt(Limb, buffer, bit_index + bit_offset, endian); bit_index += @bitSizeOf(Limb); // 2's complement (bitwise not, then add carry bit) if (!positive) { const ov = @addWithOverflow(~limb, carry); limb = ov[0]; carry = ov[1]; } x.limbs[limb_index] = limb; } // Copy the remaining bits if (bit_count != bit_index) { // Read all remaining bits var limb = switch (signedness) { .unsigned => mem.readVarPackedInt(Limb, buffer, bit_index + bit_offset, bit_count - bit_index, endian, .unsigned), .signed => b: { const SLimb = std.meta.Int(.signed, @bitSizeOf(Limb)); const limb = mem.readVarPackedInt(SLimb, buffer, bit_index + bit_offset, bit_count - bit_index, endian, .signed); break :b @as(Limb, @bitCast(limb)); }, }; // 2's complement (bitwise not, then add carry bit) if (!positive) { const ov = @addWithOverflow(~limb, carry); assert(ov[1] == 0); limb = ov[0]; } x.limbs[limb_index] = limb; limb_index += 1; } x.positive = positive; x.len = limb_index; x.normalize(x.len); } /// Normalize a possible sequence of leading zeros. /// /// [1, 2, 3, 4, 0] -> [1, 2, 3, 4] /// [1, 2, 0, 0, 0] -> [1, 2] /// [0, 0, 0, 0, 0] -> [0] pub fn normalize(r: *Mutable, length: usize) void { r.len = llnormalize(r.limbs[0..length]); } }