struct Mutable [src]
A arbitrary-precision big integer, with a fixed set of mutable limbs.
Fields
limbs: []LimbRaw digits. These are:
Little-endian ordered
limbs.len >= 1
Zero is represented as limbs.len == 1 with limbs[0] == 0.
Accessing limbs directly should be avoided.
These are allocated limbs; the len field tells the valid range.
len: usize
positive: bool
Members
- abs (Function)
- add (Function)
- addSat (Function)
- addScalar (Function)
- addWrap (Function)
- bitAnd (Function)
- bitNotWrap (Function)
- bitOr (Function)
- bitReverse (Function)
- bitXor (Function)
- byteSwap (Function)
- clone (Function)
- copy (Function)
- divFloor (Function)
- divTrunc (Function)
- dump (Function)
- eqlZero (Function)
- gcd (Function)
- gcdNoAlias (Function)
- init (Function)
- mul (Function)
- mulNoAlias (Function)
- mulWrap (Function)
- mulWrapNoAlias (Function)
- negate (Function)
- normalize (Function)
- popCount (Function)
- pow (Function)
- readPackedTwosComplement (Function)
- readTwosComplement (Function)
- saturate (Function)
- set (Function)
- setString (Function)
- setTwosCompIntLimit (Function)
- shiftLeft (Function)
- shiftLeftSat (Function)
- shiftRight (Function)
- sqrNoAlias (Function)
- sqrt (Function)
- sub (Function)
- subSat (Function)
- subWrap (Function)
- swap (Function)
- toConst (Function)
- toManaged (Function)
- truncate (Function)
Source
pub const Mutable = struct {
/// Raw digits. These are:
///
/// * Little-endian ordered
/// * limbs.len >= 1
/// * Zero is represented as limbs.len == 1 with limbs[0] == 0.
///
/// Accessing limbs directly should be avoided.
/// These are allocated limbs; the `len` field tells the valid range.
limbs: []Limb,
len: usize,
positive: bool,
pub fn toConst(self: Mutable) Const {
return .{
.limbs = self.limbs[0..self.len],
.positive = self.positive,
};
}
/// Returns true if `a == 0`.
pub fn eqlZero(self: Mutable) bool {
return self.toConst().eqlZero();
}
/// Asserts that the allocator owns the limbs memory. If this is not the case,
/// use `toConst().toManaged()`.
pub fn toManaged(self: Mutable, allocator: Allocator) Managed {
return .{
.allocator = allocator,
.limbs = self.limbs,
.metadata = if (self.positive)
self.len & ~Managed.sign_bit
else
self.len | Managed.sign_bit,
};
}
/// `value` is a primitive integer type.
/// Asserts the value fits within the provided `limbs_buffer`.
/// Note: `calcLimbLen` can be used to figure out how big an array to allocate for `limbs_buffer`.
pub fn init(limbs_buffer: []Limb, value: anytype) Mutable {
limbs_buffer[0] = 0;
var self: Mutable = .{
.limbs = limbs_buffer,
.len = 1,
.positive = true,
};
self.set(value);
return self;
}
/// Copies the value of a Const to an existing Mutable so that they both have the same value.
/// Asserts the value fits in the limbs buffer.
pub fn copy(self: *Mutable, other: Const) void {
if (self.limbs.ptr != other.limbs.ptr) {
@memcpy(self.limbs[0..other.limbs.len], other.limbs[0..other.limbs.len]);
}
// Normalize before setting `positive` so the `eqlZero` doesn't need to iterate
// over the extra zero limbs.
self.normalize(other.limbs.len);
self.positive = other.positive or other.eqlZero();
}
/// Efficiently swap an Mutable with another. This swaps the limb pointers and a full copy is not
/// performed. The address of the limbs field will not be the same after this function.
pub fn swap(self: *Mutable, other: *Mutable) void {
mem.swap(Mutable, self, other);
}
pub fn dump(self: Mutable) void {
for (self.limbs[0..self.len]) |limb| {
std.debug.print("{x} ", .{limb});
}
std.debug.print("capacity={} positive={}\n", .{ self.limbs.len, self.positive });
}
/// Clones an Mutable and returns a new Mutable with the same value. The new Mutable is a deep copy and
/// can be modified separately from the original.
/// Asserts that limbs is big enough to store the value.
pub fn clone(other: Mutable, limbs: []Limb) Mutable {
@memcpy(limbs[0..other.len], other.limbs[0..other.len]);
return .{
.limbs = limbs,
.len = other.len,
.positive = other.positive,
};
}
pub fn negate(self: *Mutable) void {
self.positive = !self.positive;
}
/// Modify to become the absolute value
pub fn abs(self: *Mutable) void {
self.positive = true;
}
/// Sets the Mutable to value. Value must be an primitive integer type.
/// Asserts the value fits within the limbs buffer.
/// Note: `calcLimbLen` can be used to figure out how big the limbs buffer
/// needs to be to store a specific value.
pub fn set(self: *Mutable, value: anytype) void {
const T = @TypeOf(value);
const needed_limbs = calcLimbLen(value);
assert(needed_limbs <= self.limbs.len); // value too big
self.len = needed_limbs;
self.positive = value >= 0;
switch (@typeInfo(T)) {
.int => |info| {
var w_value = @abs(value);
if (info.bits <= limb_bits) {
self.limbs[0] = w_value;
} else {
var i: usize = 0;
while (true) : (i += 1) {
self.limbs[i] = @as(Limb, @truncate(w_value));
w_value >>= limb_bits;
if (w_value == 0) break;
}
}
},
.comptime_int => {
comptime var w_value = @abs(value);
if (w_value <= maxInt(Limb)) {
self.limbs[0] = w_value;
} else {
const mask = (1 << limb_bits) - 1;
comptime var i = 0;
inline while (true) : (i += 1) {
self.limbs[i] = w_value & mask;
w_value >>= limb_bits;
if (w_value == 0) break;
}
}
},
else => @compileError("cannot set Mutable using type " ++ @typeName(T)),
}
}
/// Set self from the string representation `value`.
///
/// `value` must contain only digits <= `base` and is case insensitive. Base prefixes are
/// not allowed (e.g. 0x43 should simply be 43). Underscores in the input string are
/// ignored and can be used as digit separators.
///
/// Asserts there is enough memory for the value in `self.limbs`. An upper bound on number of limbs can
/// be determined with `calcSetStringLimbCount`.
/// Asserts the base is in the range [2, 36].
///
/// Returns an error if the value has invalid digits for the requested base.
///
/// `limbs_buffer` is used for temporary storage. The size required can be found with
/// `calcSetStringLimbsBufferLen`.
///
/// If `allocator` is provided, it will be used for temporary storage to improve
/// multiplication performance. `error.OutOfMemory` is handled with a fallback algorithm.
pub fn setString(
self: *Mutable,
base: u8,
value: []const u8,
limbs_buffer: []Limb,
allocator: ?Allocator,
) error{InvalidCharacter}!void {
assert(base >= 2);
assert(base <= 36);
var i: usize = 0;
var positive = true;
if (value.len > 0 and value[0] == '-') {
positive = false;
i += 1;
}
const ap_base: Const = .{ .limbs = &[_]Limb{base}, .positive = true };
self.set(0);
for (value[i..]) |ch| {
if (ch == '_') {
continue;
}
const d = try std.fmt.charToDigit(ch, base);
const ap_d: Const = .{ .limbs = &[_]Limb{d}, .positive = true };
self.mul(self.toConst(), ap_base, limbs_buffer, allocator);
self.add(self.toConst(), ap_d);
}
self.positive = positive;
}
/// Set self to either bound of a 2s-complement integer.
/// Note: The result is still sign-magnitude, not twos complement! In order to convert the
/// result to twos complement, it is sufficient to take the absolute value.
///
/// Asserts the result fits in `r`. An upper bound on the number of limbs needed by
/// r is `calcTwosCompLimbCount(bit_count)`.
pub fn setTwosCompIntLimit(
r: *Mutable,
limit: TwosCompIntLimit,
signedness: Signedness,
bit_count: usize,
) void {
// Handle zero-bit types.
if (bit_count == 0) {
r.set(0);
return;
}
const req_limbs = calcTwosCompLimbCount(bit_count);
const bit: Log2Limb = @truncate(bit_count - 1);
const signmask = @as(Limb, 1) << bit; // 0b0..010..0 where 1 is the sign bit.
const mask = (signmask << 1) -% 1; // 0b0..011..1 where the leftmost 1 is the sign bit.
r.positive = true;
switch (signedness) {
.signed => switch (limit) {
.min => {
// Negative bound, signed = -0x80.
r.len = req_limbs;
@memset(r.limbs[0 .. r.len - 1], 0);
r.limbs[r.len - 1] = signmask;
r.positive = false;
},
.max => {
// Positive bound, signed = 0x7F
// Note, in this branch we need to normalize because the first bit is
// supposed to be 0.
// Special case for 1-bit integers.
if (bit_count == 1) {
r.set(0);
} else {
const new_req_limbs = calcTwosCompLimbCount(bit_count - 1);
const msb = @as(Log2Limb, @truncate(bit_count - 2));
const new_signmask = @as(Limb, 1) << msb; // 0b0..010..0 where 1 is the sign bit.
const new_mask = (new_signmask << 1) -% 1; // 0b0..001..1 where the rightmost 0 is the sign bit.
r.len = new_req_limbs;
@memset(r.limbs[0 .. r.len - 1], maxInt(Limb));
r.limbs[r.len - 1] = new_mask;
}
},
},
.unsigned => switch (limit) {
.min => {
// Min bound, unsigned = 0x00
r.set(0);
},
.max => {
// Max bound, unsigned = 0xFF
r.len = req_limbs;
@memset(r.limbs[0 .. r.len - 1], maxInt(Limb));
r.limbs[r.len - 1] = mask;
},
},
}
}
/// r = a + scalar
///
/// r and a may be aliases.
/// scalar is a primitive integer type.
///
/// Asserts the result fits in `r`. An upper bound on the number of limbs needed by
/// r is `@max(a.limbs.len, calcLimbLen(scalar)) + 1`.
pub fn addScalar(r: *Mutable, a: Const, scalar: anytype) void {
// Normally we could just determine the number of limbs needed with calcLimbLen,
// but that is not comptime-known when scalar is not a comptime_int. Instead, we
// use calcTwosCompLimbCount for a non-comptime_int scalar, which can be pessimistic
// in the case that scalar happens to be small in magnitude within its type, but it
// is well worth being able to use the stack and not needing an allocator passed in.
// Note that Mutable.init still sets len to calcLimbLen(scalar) in any case.
const limb_len = comptime switch (@typeInfo(@TypeOf(scalar))) {
.comptime_int => calcLimbLen(scalar),
.int => |info| calcTwosCompLimbCount(info.bits),
else => @compileError("expected scalar to be an int"),
};
var limbs: [limb_len]Limb = undefined;
const operand = init(&limbs, scalar).toConst();
return add(r, a, operand);
}
/// Base implementation for addition. Adds `@max(a.limbs.len, b.limbs.len)` elements from a and b,
/// and returns whether any overflow occurred.
/// r, a and b may be aliases.
///
/// Asserts r has enough elements to hold the result. The upper bound is `@max(a.limbs.len, b.limbs.len)`.
fn addCarry(r: *Mutable, a: Const, b: Const) bool {
if (a.eqlZero()) {
r.copy(b);
return false;
} else if (b.eqlZero()) {
r.copy(a);
return false;
} else if (a.positive != b.positive) {
if (a.positive) {
// (a) + (-b) => a - b
return r.subCarry(a, b.abs());
} else {
// (-a) + (b) => b - a
return r.subCarry(b, a.abs());
}
} else {
r.positive = a.positive;
if (a.limbs.len >= b.limbs.len) {
const c = lladdcarry(r.limbs, a.limbs, b.limbs);
r.normalize(a.limbs.len);
return c != 0;
} else {
const c = lladdcarry(r.limbs, b.limbs, a.limbs);
r.normalize(b.limbs.len);
return c != 0;
}
}
}
/// r = a + b
///
/// r, a and b may be aliases.
///
/// Asserts the result fits in `r`. An upper bound on the number of limbs needed by
/// r is `@max(a.limbs.len, b.limbs.len) + 1`.
pub fn add(r: *Mutable, a: Const, b: Const) void {
if (r.addCarry(a, b)) {
// Fix up the result. Note that addCarry normalizes by a.limbs.len or b.limbs.len,
// so we need to set the length here.
const msl = @max(a.limbs.len, b.limbs.len);
// `[add|sub]Carry` normalizes by `msl`, so we need to fix up the result manually here.
// Note, the fact that it normalized means that the intermediary limbs are zero here.
r.len = msl + 1;
r.limbs[msl] = 1; // If this panics, there wasn't enough space in `r`.
}
}
/// r = a + b with 2s-complement wrapping semantics. Returns whether overflow occurred.
/// r, a and b may be aliases
///
/// Asserts the result fits in `r`. An upper bound on the number of limbs needed by
/// r is `calcTwosCompLimbCount(bit_count)`.
pub fn addWrap(r: *Mutable, a: Const, b: Const, signedness: Signedness, bit_count: usize) bool {
const req_limbs = calcTwosCompLimbCount(bit_count);
// Slice of the upper bits if they exist, these will be ignored and allows us to use addCarry to determine
// if an overflow occurred.
const x = Const{
.positive = a.positive,
.limbs = a.limbs[0..@min(req_limbs, a.limbs.len)],
};
const y = Const{
.positive = b.positive,
.limbs = b.limbs[0..@min(req_limbs, b.limbs.len)],
};
var carry_truncated = false;
if (r.addCarry(x, y)) {
// There are two possibilities here:
// - We overflowed req_limbs. In this case, the carry is ignored, as it would be removed by
// truncate anyway.
// - a and b had less elements than req_limbs, and those were overflowed. This case needs to be handled.
// Note: after this we still might need to wrap.
const msl = @max(a.limbs.len, b.limbs.len);
if (msl < req_limbs) {
r.limbs[msl] = 1;
r.len = req_limbs;
@memset(r.limbs[msl + 1 .. req_limbs], 0);
} else {
carry_truncated = true;
}
}
if (!r.toConst().fitsInTwosComp(signedness, bit_count)) {
r.truncate(r.toConst(), signedness, bit_count);
return true;
}
return carry_truncated;
}
/// r = a + b with 2s-complement saturating semantics.
/// r, a and b may be aliases.
///
/// Assets the result fits in `r`. Upper bound on the number of limbs needed by
/// r is `calcTwosCompLimbCount(bit_count)`.
pub fn addSat(r: *Mutable, a: Const, b: Const, signedness: Signedness, bit_count: usize) void {
const req_limbs = calcTwosCompLimbCount(bit_count);
// Slice of the upper bits if they exist, these will be ignored and allows us to use addCarry to determine
// if an overflow occurred.
const x = Const{
.positive = a.positive,
.limbs = a.limbs[0..@min(req_limbs, a.limbs.len)],
};
const y = Const{
.positive = b.positive,
.limbs = b.limbs[0..@min(req_limbs, b.limbs.len)],
};
if (r.addCarry(x, y)) {
// There are two possibilities here:
// - We overflowed req_limbs, in which case we need to saturate.
// - a and b had less elements than req_limbs, and those were overflowed.
// Note: In this case, might _also_ need to saturate.
const msl = @max(a.limbs.len, b.limbs.len);
if (msl < req_limbs) {
r.limbs[msl] = 1;
r.len = req_limbs;
// Note: Saturation may still be required if msl == req_limbs - 1
} else {
// Overflowed req_limbs, definitely saturate.
r.setTwosCompIntLimit(if (r.positive) .max else .min, signedness, bit_count);
}
}
// Saturate if the result didn't fit.
r.saturate(r.toConst(), signedness, bit_count);
}
/// Base implementation for subtraction. Subtracts `@max(a.limbs.len, b.limbs.len)` elements from a and b,
/// and returns whether any overflow occurred.
/// r, a and b may be aliases.
///
/// Asserts r has enough elements to hold the result. The upper bound is `@max(a.limbs.len, b.limbs.len)`.
fn subCarry(r: *Mutable, a: Const, b: Const) bool {
if (a.eqlZero()) {
r.copy(b);
r.positive = !b.positive;
return false;
} else if (b.eqlZero()) {
r.copy(a);
return false;
} else if (a.positive != b.positive) {
if (a.positive) {
// (a) - (-b) => a + b
return r.addCarry(a, b.abs());
} else {
// (-a) - (b) => -a + -b
return r.addCarry(a, b.negate());
}
} else if (a.positive) {
if (a.order(b) != .lt) {
// (a) - (b) => a - b
const c = llsubcarry(r.limbs, a.limbs, b.limbs);
r.normalize(a.limbs.len);
r.positive = true;
return c != 0;
} else {
// (a) - (b) => -b + a => -(b - a)
const c = llsubcarry(r.limbs, b.limbs, a.limbs);
r.normalize(b.limbs.len);
r.positive = false;
return c != 0;
}
} else {
if (a.order(b) == .lt) {
// (-a) - (-b) => -(a - b)
const c = llsubcarry(r.limbs, a.limbs, b.limbs);
r.normalize(a.limbs.len);
r.positive = false;
return c != 0;
} else {
// (-a) - (-b) => --b + -a => b - a
const c = llsubcarry(r.limbs, b.limbs, a.limbs);
r.normalize(b.limbs.len);
r.positive = true;
return c != 0;
}
}
}
/// r = a - b
///
/// r, a and b may be aliases.
///
/// Asserts the result fits in `r`. An upper bound on the number of limbs needed by
/// r is `@max(a.limbs.len, b.limbs.len) + 1`. The +1 is not needed if both operands are positive.
pub fn sub(r: *Mutable, a: Const, b: Const) void {
r.add(a, b.negate());
}
/// r = a - b with 2s-complement wrapping semantics. Returns whether any overflow occurred.
///
/// r, a and b may be aliases
/// Asserts the result fits in `r`. An upper bound on the number of limbs needed by
/// r is `calcTwosCompLimbCount(bit_count)`.
pub fn subWrap(r: *Mutable, a: Const, b: Const, signedness: Signedness, bit_count: usize) bool {
return r.addWrap(a, b.negate(), signedness, bit_count);
}
/// r = a - b with 2s-complement saturating semantics.
/// r, a and b may be aliases.
///
/// Assets the result fits in `r`. Upper bound on the number of limbs needed by
/// r is `calcTwosCompLimbCount(bit_count)`.
pub fn subSat(r: *Mutable, a: Const, b: Const, signedness: Signedness, bit_count: usize) void {
r.addSat(a, b.negate(), signedness, bit_count);
}
/// rma = a * b
///
/// `rma` may alias with `a` or `b`.
/// `a` and `b` may alias with each other.
///
/// Asserts the result fits in `rma`. An upper bound on the number of limbs needed by
/// rma is given by `a.limbs.len + b.limbs.len`.
///
/// `limbs_buffer` is used for temporary storage. The amount required is given by `calcMulLimbsBufferLen`.
pub fn mul(rma: *Mutable, a: Const, b: Const, limbs_buffer: []Limb, allocator: ?Allocator) void {
var buf_index: usize = 0;
const a_copy = if (rma.limbs.ptr == a.limbs.ptr) blk: {
const start = buf_index;
@memcpy(limbs_buffer[buf_index..][0..a.limbs.len], a.limbs);
buf_index += a.limbs.len;
break :blk a.toMutable(limbs_buffer[start..buf_index]).toConst();
} else a;
const b_copy = if (rma.limbs.ptr == b.limbs.ptr) blk: {
const start = buf_index;
@memcpy(limbs_buffer[buf_index..][0..b.limbs.len], b.limbs);
buf_index += b.limbs.len;
break :blk b.toMutable(limbs_buffer[start..buf_index]).toConst();
} else b;
return rma.mulNoAlias(a_copy, b_copy, allocator);
}
/// rma = a * b
///
/// `rma` may not alias with `a` or `b`.
/// `a` and `b` may alias with each other.
///
/// Asserts the result fits in `rma`. An upper bound on the number of limbs needed by
/// rma is given by `a.limbs.len + b.limbs.len`.
///
/// If `allocator` is provided, it will be used for temporary storage to improve
/// multiplication performance. `error.OutOfMemory` is handled with a fallback algorithm.
pub fn mulNoAlias(rma: *Mutable, a: Const, b: Const, allocator: ?Allocator) void {
assert(rma.limbs.ptr != a.limbs.ptr); // illegal aliasing
assert(rma.limbs.ptr != b.limbs.ptr); // illegal aliasing
if (a.limbs.len == 1 and b.limbs.len == 1) {
const ov = @mulWithOverflow(a.limbs[0], b.limbs[0]);
rma.limbs[0] = ov[0];
if (ov[1] == 0) {
rma.len = 1;
rma.positive = (a.positive == b.positive);
return;
}
}
@memset(rma.limbs[0 .. a.limbs.len + b.limbs.len], 0);
llmulacc(.add, allocator, rma.limbs, a.limbs, b.limbs);
rma.normalize(a.limbs.len + b.limbs.len);
rma.positive = (a.positive == b.positive);
}
/// rma = a * b with 2s-complement wrapping semantics.
///
/// `rma` may alias with `a` or `b`.
/// `a` and `b` may alias with each other.
///
/// Asserts the result fits in `rma`. An upper bound on the number of limbs needed by
/// rma is given by `a.limbs.len + b.limbs.len`.
///
/// `limbs_buffer` is used for temporary storage. The amount required is given by `calcMulWrapLimbsBufferLen`.
pub fn mulWrap(
rma: *Mutable,
a: Const,
b: Const,
signedness: Signedness,
bit_count: usize,
limbs_buffer: []Limb,
allocator: ?Allocator,
) void {
var buf_index: usize = 0;
const req_limbs = calcTwosCompLimbCount(bit_count);
const a_copy = if (rma.limbs.ptr == a.limbs.ptr) blk: {
const start = buf_index;
const a_len = @min(req_limbs, a.limbs.len);
@memcpy(limbs_buffer[buf_index..][0..a_len], a.limbs[0..a_len]);
buf_index += a_len;
break :blk a.toMutable(limbs_buffer[start..buf_index]).toConst();
} else a;
const b_copy = if (rma.limbs.ptr == b.limbs.ptr) blk: {
const start = buf_index;
const b_len = @min(req_limbs, b.limbs.len);
@memcpy(limbs_buffer[buf_index..][0..b_len], b.limbs[0..b_len]);
buf_index += b_len;
break :blk a.toMutable(limbs_buffer[start..buf_index]).toConst();
} else b;
return rma.mulWrapNoAlias(a_copy, b_copy, signedness, bit_count, allocator);
}
/// rma = a * b with 2s-complement wrapping semantics.
///
/// `rma` may not alias with `a` or `b`.
/// `a` and `b` may alias with each other.
///
/// Asserts the result fits in `rma`. An upper bound on the number of limbs needed by
/// rma is given by `a.limbs.len + b.limbs.len`.
///
/// If `allocator` is provided, it will be used for temporary storage to improve
/// multiplication performance. `error.OutOfMemory` is handled with a fallback algorithm.
pub fn mulWrapNoAlias(
rma: *Mutable,
a: Const,
b: Const,
signedness: Signedness,
bit_count: usize,
allocator: ?Allocator,
) void {
assert(rma.limbs.ptr != a.limbs.ptr); // illegal aliasing
assert(rma.limbs.ptr != b.limbs.ptr); // illegal aliasing
const req_limbs = calcTwosCompLimbCount(bit_count);
// We can ignore the upper bits here, those results will be discarded anyway.
const a_limbs = a.limbs[0..@min(req_limbs, a.limbs.len)];
const b_limbs = b.limbs[0..@min(req_limbs, b.limbs.len)];
@memset(rma.limbs[0..req_limbs], 0);
llmulacc(.add, allocator, rma.limbs, a_limbs, b_limbs);
rma.normalize(@min(req_limbs, a.limbs.len + b.limbs.len));
rma.positive = (a.positive == b.positive);
rma.truncate(rma.toConst(), signedness, bit_count);
}
/// r = @bitReverse(a) with 2s-complement semantics.
/// r and a may be aliases.
///
/// Asserts the result fits in `r`. Upper bound on the number of limbs needed by
/// r is `calcTwosCompLimbCount(bit_count)`.
pub fn bitReverse(r: *Mutable, a: Const, signedness: Signedness, bit_count: usize) void {
if (bit_count == 0) return;
r.copy(a);
const limbs_required = calcTwosCompLimbCount(bit_count);
if (!a.positive) {
r.positive = true; // Negate.
r.bitNotWrap(r.toConst(), .unsigned, bit_count); // Bitwise NOT.
r.addScalar(r.toConst(), 1); // Add one.
} else if (limbs_required > a.limbs.len) {
// Zero-extend to our output length
for (r.limbs[a.limbs.len..limbs_required]) |*limb| {
limb.* = 0;
}
r.len = limbs_required;
}
// 0b0..01..1000 with @log2(@sizeOf(Limb)) consecutive ones
const endian_mask: usize = (@sizeOf(Limb) - 1) << 3;
const bytes = std.mem.sliceAsBytes(r.limbs);
var k: usize = 0;
while (k < ((bit_count + 1) / 2)) : (k += 1) {
var i = k;
var rev_i = bit_count - i - 1;
// This "endian mask" remaps a low (LE) byte to the corresponding high
// (BE) byte in the Limb, without changing which limbs we are indexing
if (native_endian == .big) {
i ^= endian_mask;
rev_i ^= endian_mask;
}
const bit_i = std.mem.readPackedInt(u1, bytes, i, .little);
const bit_rev_i = std.mem.readPackedInt(u1, bytes, rev_i, .little);
std.mem.writePackedInt(u1, bytes, i, bit_rev_i, .little);
std.mem.writePackedInt(u1, bytes, rev_i, bit_i, .little);
}
// Calculate signed-magnitude representation for output
if (signedness == .signed) {
const last_bit = switch (native_endian) {
.little => std.mem.readPackedInt(u1, bytes, bit_count - 1, .little),
.big => std.mem.readPackedInt(u1, bytes, (bit_count - 1) ^ endian_mask, .little),
};
if (last_bit == 1) {
r.bitNotWrap(r.toConst(), .unsigned, bit_count); // Bitwise NOT.
r.addScalar(r.toConst(), 1); // Add one.
r.positive = false; // Negate.
}
}
r.normalize(r.len);
}
/// r = @byteSwap(a) with 2s-complement semantics.
/// r and a may be aliases.
///
/// Asserts the result fits in `r`. Upper bound on the number of limbs needed by
/// r is `calcTwosCompLimbCount(8*byte_count)`.
pub fn byteSwap(r: *Mutable, a: Const, signedness: Signedness, byte_count: usize) void {
if (byte_count == 0) return;
r.copy(a);
const limbs_required = calcTwosCompLimbCount(8 * byte_count);
if (!a.positive) {
r.positive = true; // Negate.
r.bitNotWrap(r.toConst(), .unsigned, 8 * byte_count); // Bitwise NOT.
r.addScalar(r.toConst(), 1); // Add one.
} else if (limbs_required > a.limbs.len) {
// Zero-extend to our output length
for (r.limbs[a.limbs.len..limbs_required]) |*limb| {
limb.* = 0;
}
r.len = limbs_required;
}
// 0b0..01..1 with @log2(@sizeOf(Limb)) trailing ones
const endian_mask: usize = @sizeOf(Limb) - 1;
var bytes = std.mem.sliceAsBytes(r.limbs);
assert(bytes.len >= byte_count);
var k: usize = 0;
while (k < (byte_count + 1) / 2) : (k += 1) {
var i = k;
var rev_i = byte_count - k - 1;
// This "endian mask" remaps a low (LE) byte to the corresponding high
// (BE) byte in the Limb, without changing which limbs we are indexing
if (native_endian == .big) {
i ^= endian_mask;
rev_i ^= endian_mask;
}
const byte_i = bytes[i];
const byte_rev_i = bytes[rev_i];
bytes[rev_i] = byte_i;
bytes[i] = byte_rev_i;
}
// Calculate signed-magnitude representation for output
if (signedness == .signed) {
const last_byte = switch (native_endian) {
.little => bytes[byte_count - 1],
.big => bytes[(byte_count - 1) ^ endian_mask],
};
if (last_byte & (1 << 7) != 0) { // Check sign bit of last byte
r.bitNotWrap(r.toConst(), .unsigned, 8 * byte_count); // Bitwise NOT.
r.addScalar(r.toConst(), 1); // Add one.
r.positive = false; // Negate.
}
}
r.normalize(r.len);
}
/// r = @popCount(a) with 2s-complement semantics.
/// r and a may be aliases.
///
/// Assets the result fits in `r`. Upper bound on the number of limbs needed by
/// r is `calcTwosCompLimbCount(bit_count)`.
pub fn popCount(r: *Mutable, a: Const, bit_count: usize) void {
r.copy(a);
if (!a.positive) {
r.positive = true; // Negate.
r.bitNotWrap(r.toConst(), .unsigned, bit_count); // Bitwise NOT.
r.addScalar(r.toConst(), 1); // Add one.
}
var sum: Limb = 0;
for (r.limbs[0..r.len]) |limb| {
sum += @popCount(limb);
}
r.set(sum);
}
/// rma = a * a
///
/// `rma` may not alias with `a`.
///
/// Asserts the result fits in `rma`. An upper bound on the number of limbs needed by
/// rma is given by `2 * a.limbs.len + 1`.
///
/// If `allocator` is provided, it will be used for temporary storage to improve
/// multiplication performance. `error.OutOfMemory` is handled with a fallback algorithm.
pub fn sqrNoAlias(rma: *Mutable, a: Const, opt_allocator: ?Allocator) void {
_ = opt_allocator;
assert(rma.limbs.ptr != a.limbs.ptr); // illegal aliasing
@memset(rma.limbs, 0);
llsquareBasecase(rma.limbs, a.limbs);
rma.normalize(2 * a.limbs.len + 1);
rma.positive = true;
}
/// q = a / b (rem r)
///
/// a / b are floored (rounded towards 0).
/// q may alias with a or b.
///
/// Asserts there is enough memory to store q and r.
/// The upper bound for r limb count is `b.limbs.len`.
/// The upper bound for q limb count is given by `a.limbs`.
///
/// `limbs_buffer` is used for temporary storage. The amount required is given by `calcDivLimbsBufferLen`.
pub fn divFloor(
q: *Mutable,
r: *Mutable,
a: Const,
b: Const,
limbs_buffer: []Limb,
) void {
const sep = a.limbs.len + 2;
var x = a.toMutable(limbs_buffer[0..sep]);
var y = b.toMutable(limbs_buffer[sep..]);
div(q, r, &x, &y);
// Note, `div` performs truncating division, which satisfies
// @divTrunc(a, b) * b + @rem(a, b) = a
// so r = a - @divTrunc(a, b) * b
// Note, @rem(a, -b) = @rem(-b, a) = -@rem(a, b) = -@rem(-a, -b)
// For divTrunc, we want to perform
// @divFloor(a, b) * b + @mod(a, b) = a
// Note:
// @divFloor(-a, b)
// = @divFloor(a, -b)
// = -@divCeil(a, b)
// = -@divFloor(a + b - 1, b)
// = -@divTrunc(a + b - 1, b)
// Note (1):
// @divTrunc(a + b - 1, b) * b + @rem(a + b - 1, b) = a + b - 1
// = @divTrunc(a + b - 1, b) * b + @rem(a - 1, b) = a + b - 1
// = @divTrunc(a + b - 1, b) * b + @rem(a - 1, b) - b + 1 = a
if (a.positive and b.positive) {
// Positive-positive case, don't need to do anything.
} else if (a.positive and !b.positive) {
// a/-b -> q is negative, and so we need to fix flooring.
// Subtract one to make the division flooring.
// @divFloor(a, -b) * -b + @mod(a, -b) = a
// If b divides a exactly, we have @divFloor(a, -b) * -b = a
// Else, we have @divFloor(a, -b) * -b > a, so @mod(a, -b) becomes negative
// We have:
// @divFloor(a, -b) * -b + @mod(a, -b) = a
// = -@divTrunc(a + b - 1, b) * -b + @mod(a, -b) = a
// = @divTrunc(a + b - 1, b) * b + @mod(a, -b) = a
// Substitute a for (1):
// @divTrunc(a + b - 1, b) * b + @rem(a - 1, b) - b + 1 = @divTrunc(a + b - 1, b) * b + @mod(a, -b)
// Yields:
// @mod(a, -b) = @rem(a - 1, b) - b + 1
// Note that `r` holds @rem(a, b) at this point.
//
// If @rem(a, b) is not 0:
// @rem(a - 1, b) = @rem(a, b) - 1
// => @mod(a, -b) = @rem(a, b) - 1 - b + 1 = @rem(a, b) - b
// Else:
// @rem(a - 1, b) = @rem(a + b - 1, b) = @rem(b - 1, b) = b - 1
// => @mod(a, -b) = b - 1 - b + 1 = 0
if (!r.eqlZero()) {
q.addScalar(q.toConst(), -1);
r.positive = true;
r.sub(r.toConst(), y.toConst().abs());
}
} else if (!a.positive and b.positive) {
// -a/b -> q is negative, and so we need to fix flooring.
// Subtract one to make the division flooring.
// @divFloor(-a, b) * b + @mod(-a, b) = a
// If b divides a exactly, we have @divFloor(-a, b) * b = -a
// Else, we have @divFloor(-a, b) * b < -a, so @mod(-a, b) becomes positive
// We have:
// @divFloor(-a, b) * b + @mod(-a, b) = -a
// = -@divTrunc(a + b - 1, b) * b + @mod(-a, b) = -a
// = @divTrunc(a + b - 1, b) * b - @mod(-a, b) = a
// Substitute a for (1):
// @divTrunc(a + b - 1, b) * b + @rem(a - 1, b) - b + 1 = @divTrunc(a + b - 1, b) * b - @mod(-a, b)
// Yields:
// @rem(a - 1, b) - b + 1 = -@mod(-a, b)
// => -@mod(-a, b) = @rem(a - 1, b) - b + 1
// => @mod(-a, b) = -(@rem(a - 1, b) - b + 1) = -@rem(a - 1, b) + b - 1
//
// If @rem(a, b) is not 0:
// @rem(a - 1, b) = @rem(a, b) - 1
// => @mod(-a, b) = -(@rem(a, b) - 1) + b - 1 = -@rem(a, b) + 1 + b - 1 = -@rem(a, b) + b
// Else :
// @rem(a - 1, b) = b - 1
// => @mod(-a, b) = -(b - 1) + b - 1 = 0
if (!r.eqlZero()) {
q.addScalar(q.toConst(), -1);
r.positive = false;
r.add(r.toConst(), y.toConst().abs());
}
} else if (!a.positive and !b.positive) {
// a/b -> q is positive, don't need to do anything to fix flooring.
// @divFloor(-a, -b) * -b + @mod(-a, -b) = -a
// If b divides a exactly, we have @divFloor(-a, -b) * -b = -a
// Else, we have @divFloor(-a, -b) * -b > -a, so @mod(-a, -b) becomes negative
// We have:
// @divFloor(-a, -b) * -b + @mod(-a, -b) = -a
// = @divTrunc(a, b) * -b + @mod(-a, -b) = -a
// = @divTrunc(a, b) * b - @mod(-a, -b) = a
// We also have:
// @divTrunc(a, b) * b + @rem(a, b) = a
// Substitute a:
// @divTrunc(a, b) * b + @rem(a, b) = @divTrunc(a, b) * b - @mod(-a, -b)
// => @rem(a, b) = -@mod(-a, -b)
// => @mod(-a, -b) = -@rem(a, b)
r.positive = false;
}
}
/// q = a / b (rem r)
///
/// a / b are truncated (rounded towards -inf).
/// q may alias with a or b.
///
/// Asserts there is enough memory to store q and r.
/// The upper bound for r limb count is `b.limbs.len`.
/// The upper bound for q limb count is given by `a.limbs.len`.
///
/// `limbs_buffer` is used for temporary storage. The amount required is given by `calcDivLimbsBufferLen`.
pub fn divTrunc(
q: *Mutable,
r: *Mutable,
a: Const,
b: Const,
limbs_buffer: []Limb,
) void {
const sep = a.limbs.len + 2;
var x = a.toMutable(limbs_buffer[0..sep]);
var y = b.toMutable(limbs_buffer[sep..]);
div(q, r, &x, &y);
}
/// r = a << shift, in other words, r = a * 2^shift
///
/// r and a may alias.
///
/// Asserts there is enough memory to fit the result. The upper bound Limb count is
/// `a.limbs.len + (shift / (@sizeOf(Limb) * 8))`.
pub fn shiftLeft(r: *Mutable, a: Const, shift: usize) void {
llshl(r.limbs, a.limbs, shift);
r.normalize(a.limbs.len + (shift / limb_bits) + 1);
r.positive = a.positive;
}
/// r = a <<| shift with 2s-complement saturating semantics.
///
/// r and a may alias.
///
/// Asserts there is enough memory to fit the result. The upper bound Limb count is
/// r is `calcTwosCompLimbCount(bit_count)`.
pub fn shiftLeftSat(r: *Mutable, a: Const, shift: usize, signedness: Signedness, bit_count: usize) void {
// Special case: When the argument is negative, but the result is supposed to be unsigned,
// return 0 in all cases.
if (!a.positive and signedness == .unsigned) {
r.set(0);
return;
}
// Check whether the shift is going to overflow. This is the case
// when (in 2s complement) any bit above `bit_count - shift` is set in the unshifted value.
// Note, the sign bit is not counted here.
// Handle shifts larger than the target type. This also deals with
// 0-bit integers.
if (bit_count <= shift) {
// In this case, there is only no overflow if `a` is zero.
if (a.eqlZero()) {
r.set(0);
} else {
r.setTwosCompIntLimit(if (a.positive) .max else .min, signedness, bit_count);
}
return;
}
const checkbit = bit_count - shift - @intFromBool(signedness == .signed);
// If `checkbit` and more significant bits are zero, no overflow will take place.
if (checkbit >= a.limbs.len * limb_bits) {
// `checkbit` is outside the range of a, so definitely no overflow will take place. We
// can defer to a normal shift.
// Note that if `a` is normalized (which we assume), this checks for set bits in the upper limbs.
// Note, in this case r should already have enough limbs required to perform the normal shift.
// In this case the shift of the most significant limb may still overflow.
r.shiftLeft(a, shift);
return;
} else if (checkbit < (a.limbs.len - 1) * limb_bits) {
// `checkbit` is not in the most significant limb. If `a` is normalized the most significant
// limb will not be zero, so in this case we need to saturate. Note that `a.limbs.len` must be
// at least one according to normalization rules.
r.setTwosCompIntLimit(if (a.positive) .max else .min, signedness, bit_count);
return;
}
// Generate a mask with the bits to check in the most significant limb. We'll need to check
// all bits with equal or more significance than checkbit.
// const msb = @truncate(Log2Limb, checkbit);
// const checkmask = (@as(Limb, 1) << msb) -% 1;
if (a.limbs[a.limbs.len - 1] >> @as(Log2Limb, @truncate(checkbit)) != 0) {
// Need to saturate.
r.setTwosCompIntLimit(if (a.positive) .max else .min, signedness, bit_count);
return;
}
// This shift should not be able to overflow, so invoke llshl and normalize manually
// to avoid the extra required limb.
llshl(r.limbs, a.limbs, shift);
r.normalize(a.limbs.len + (shift / limb_bits));
r.positive = a.positive;
}
/// r = a >> shift
/// r and a may alias.
///
/// Asserts there is enough memory to fit the result. The upper bound Limb count is
/// `a.limbs.len - (shift / (@sizeOf(Limb) * 8))`.
pub fn shiftRight(r: *Mutable, a: Const, shift: usize) void {
const full_limbs_shifted_out = shift / limb_bits;
const remaining_bits_shifted_out = shift % limb_bits;
if (a.limbs.len <= full_limbs_shifted_out) {
// Shifting negative numbers converges to -1 instead of 0
if (a.positive) {
r.len = 1;
r.positive = true;
r.limbs[0] = 0;
} else {
r.len = 1;
r.positive = false;
r.limbs[0] = 1;
}
return;
}
const nonzero_negative_shiftout = if (a.positive) false else nonzero: {
for (a.limbs[0..full_limbs_shifted_out]) |x| {
if (x != 0)
break :nonzero true;
}
if (remaining_bits_shifted_out == 0)
break :nonzero false;
const not_covered: Log2Limb = @intCast(limb_bits - remaining_bits_shifted_out);
break :nonzero a.limbs[full_limbs_shifted_out] << not_covered != 0;
};
llshr(r.limbs, a.limbs, shift);
r.len = a.limbs.len - full_limbs_shifted_out;
r.positive = a.positive;
if (nonzero_negative_shiftout) r.addScalar(r.toConst(), -1);
r.normalize(r.len);
}
/// r = ~a under 2s complement wrapping semantics.
/// r may alias with a.
///
/// Assets that r has enough limbs to store the result. The upper bound Limb count is
/// r is `calcTwosCompLimbCount(bit_count)`.
pub fn bitNotWrap(r: *Mutable, a: Const, signedness: Signedness, bit_count: usize) void {
r.copy(a.negate());
const negative_one = Const{ .limbs = &.{1}, .positive = false };
_ = r.addWrap(r.toConst(), negative_one, signedness, bit_count);
}
/// r = a | b under 2s complement semantics.
/// r may alias with a or b.
///
/// a and b are zero-extended to the longer of a or b.
///
/// Asserts that r has enough limbs to store the result. Upper bound is `@max(a.limbs.len, b.limbs.len)`.
pub fn bitOr(r: *Mutable, a: Const, b: Const) void {
// Trivial cases, llsignedor does not support zero.
if (a.eqlZero()) {
r.copy(b);
return;
} else if (b.eqlZero()) {
r.copy(a);
return;
}
if (a.limbs.len >= b.limbs.len) {
r.positive = llsignedor(r.limbs, a.limbs, a.positive, b.limbs, b.positive);
r.normalize(if (b.positive) a.limbs.len else b.limbs.len);
} else {
r.positive = llsignedor(r.limbs, b.limbs, b.positive, a.limbs, a.positive);
r.normalize(if (a.positive) b.limbs.len else a.limbs.len);
}
}
/// r = a & b under 2s complement semantics.
/// r may alias with a or b.
///
/// Asserts that r has enough limbs to store the result.
/// If only a is positive, the upper bound is `a.limbs.len`.
/// If only b is positive, the upper bound is `b.limbs.len`.
/// If a and b are positive, the upper bound is `@min(a.limbs.len, b.limbs.len)`.
/// If a and b are negative, the upper bound is `@max(a.limbs.len, b.limbs.len) + 1`.
pub fn bitAnd(r: *Mutable, a: Const, b: Const) void {
// Trivial cases, llsignedand does not support zero.
if (a.eqlZero()) {
r.copy(a);
return;
} else if (b.eqlZero()) {
r.copy(b);
return;
}
if (a.limbs.len >= b.limbs.len) {
r.positive = llsignedand(r.limbs, a.limbs, a.positive, b.limbs, b.positive);
r.normalize(if (b.positive) b.limbs.len else if (a.positive) a.limbs.len else a.limbs.len + 1);
} else {
r.positive = llsignedand(r.limbs, b.limbs, b.positive, a.limbs, a.positive);
r.normalize(if (a.positive) a.limbs.len else if (b.positive) b.limbs.len else b.limbs.len + 1);
}
}
/// r = a ^ b under 2s complement semantics.
/// r may alias with a or b.
///
/// Asserts that r has enough limbs to store the result. If a and b share the same signedness, the
/// upper bound is `@max(a.limbs.len, b.limbs.len)`. Otherwise, if either a or b is negative
/// but not both, the upper bound is `@max(a.limbs.len, b.limbs.len) + 1`.
pub fn bitXor(r: *Mutable, a: Const, b: Const) void {
// Trivial cases, because llsignedxor does not support negative zero.
if (a.eqlZero()) {
r.copy(b);
return;
} else if (b.eqlZero()) {
r.copy(a);
return;
}
if (a.limbs.len > b.limbs.len) {
r.positive = llsignedxor(r.limbs, a.limbs, a.positive, b.limbs, b.positive);
r.normalize(a.limbs.len + @intFromBool(a.positive != b.positive));
} else {
r.positive = llsignedxor(r.limbs, b.limbs, b.positive, a.limbs, a.positive);
r.normalize(b.limbs.len + @intFromBool(a.positive != b.positive));
}
}
/// rma may alias x or y.
/// x and y may alias each other.
/// Asserts that `rma` has enough limbs to store the result. Upper bound is
/// `@min(x.limbs.len, y.limbs.len)`.
///
/// `limbs_buffer` is used for temporary storage during the operation. When this function returns,
/// it will have the same length as it had when the function was called.
pub fn gcd(rma: *Mutable, x: Const, y: Const, limbs_buffer: *std.ArrayList(Limb)) !void {
const prev_len = limbs_buffer.items.len;
defer limbs_buffer.shrinkRetainingCapacity(prev_len);
const x_copy = if (rma.limbs.ptr == x.limbs.ptr) blk: {
const start = limbs_buffer.items.len;
try limbs_buffer.appendSlice(x.limbs);
break :blk x.toMutable(limbs_buffer.items[start..]).toConst();
} else x;
const y_copy = if (rma.limbs.ptr == y.limbs.ptr) blk: {
const start = limbs_buffer.items.len;
try limbs_buffer.appendSlice(y.limbs);
break :blk y.toMutable(limbs_buffer.items[start..]).toConst();
} else y;
return gcdLehmer(rma, x_copy, y_copy, limbs_buffer);
}
/// q = a ^ b
///
/// r may not alias a.
///
/// Asserts that `r` has enough limbs to store the result. Upper bound is
/// `calcPowLimbsBufferLen(a.bitCountAbs(), b)`.
///
/// `limbs_buffer` is used for temporary storage.
/// The amount required is given by `calcPowLimbsBufferLen`.
pub fn pow(r: *Mutable, a: Const, b: u32, limbs_buffer: []Limb) void {
assert(r.limbs.ptr != a.limbs.ptr); // illegal aliasing
// Handle all the trivial cases first
switch (b) {
0 => {
// a^0 = 1
return r.set(1);
},
1 => {
// a^1 = a
return r.copy(a);
},
else => {},
}
if (a.eqlZero()) {
// 0^b = 0
return r.set(0);
} else if (a.limbs.len == 1 and a.limbs[0] == 1) {
// 1^b = 1 and -1^b = ±1
r.set(1);
r.positive = a.positive or (b & 1) == 0;
return;
}
// Here a>1 and b>1
const needed_limbs = calcPowLimbsBufferLen(a.bitCountAbs(), b);
assert(r.limbs.len >= needed_limbs);
assert(limbs_buffer.len >= needed_limbs);
llpow(r.limbs, a.limbs, b, limbs_buffer);
r.normalize(needed_limbs);
r.positive = a.positive or (b & 1) == 0;
}
/// r = ⌊√a⌋
///
/// r may alias a.
///
/// Asserts that `r` has enough limbs to store the result. Upper bound is
/// `(a.limbs.len - 1) / 2 + 1`.
///
/// `limbs_buffer` is used for temporary storage.
/// The amount required is given by `calcSqrtLimbsBufferLen`.
pub fn sqrt(
r: *Mutable,
a: Const,
limbs_buffer: []Limb,
) void {
// Brent and Zimmermann, Modern Computer Arithmetic, Algorithm 1.13 SqrtInt
// https://members.loria.fr/PZimmermann/mca/pub226.html
var buf_index: usize = 0;
var t = b: {
const start = buf_index;
buf_index += a.limbs.len;
break :b Mutable.init(limbs_buffer[start..buf_index], 0);
};
var u = b: {
const start = buf_index;
const shift = (a.bitCountAbs() + 1) / 2;
buf_index += 1 + ((shift / limb_bits) + 1);
var m = Mutable.init(limbs_buffer[start..buf_index], 1);
m.shiftLeft(m.toConst(), shift); // u must be >= ⌊√a⌋, and should be as small as possible for efficiency
break :b m;
};
var s = b: {
const start = buf_index;
buf_index += u.limbs.len;
break :b u.toConst().toMutable(limbs_buffer[start..buf_index]);
};
var rem = b: {
const start = buf_index;
buf_index += s.limbs.len;
break :b Mutable.init(limbs_buffer[start..buf_index], 0);
};
while (true) {
t.divFloor(&rem, a, s.toConst(), limbs_buffer[buf_index..]);
t.add(t.toConst(), s.toConst());
u.shiftRight(t.toConst(), 1);
if (u.toConst().order(s.toConst()).compare(.gte)) {
r.copy(s.toConst());
return;
}
// Avoid copying u to s by swapping u and s
const tmp_s = s;
s = u;
u = tmp_s;
}
}
/// rma may not alias x or y.
/// x and y may alias each other.
/// Asserts that `rma` has enough limbs to store the result. Upper bound is given by `calcGcdNoAliasLimbLen`.
///
/// `limbs_buffer` is used for temporary storage during the operation.
pub fn gcdNoAlias(rma: *Mutable, x: Const, y: Const, limbs_buffer: *std.ArrayList(Limb)) !void {
assert(rma.limbs.ptr != x.limbs.ptr); // illegal aliasing
assert(rma.limbs.ptr != y.limbs.ptr); // illegal aliasing
return gcdLehmer(rma, x, y, limbs_buffer);
}
fn gcdLehmer(result: *Mutable, xa: Const, ya: Const, limbs_buffer: *std.ArrayList(Limb)) !void {
var x = try xa.toManaged(limbs_buffer.allocator);
defer x.deinit();
x.abs();
var y = try ya.toManaged(limbs_buffer.allocator);
defer y.deinit();
y.abs();
if (x.toConst().order(y.toConst()) == .lt) {
x.swap(&y);
}
var t_big = try Managed.init(limbs_buffer.allocator);
defer t_big.deinit();
var r = try Managed.init(limbs_buffer.allocator);
defer r.deinit();
var tmp_x = try Managed.init(limbs_buffer.allocator);
defer tmp_x.deinit();
while (y.len() > 1 and !y.eqlZero()) {
assert(x.isPositive() and y.isPositive());
assert(x.len() >= y.len());
var xh: SignedDoubleLimb = x.limbs[x.len() - 1];
var yh: SignedDoubleLimb = if (x.len() > y.len()) 0 else y.limbs[x.len() - 1];
var A: SignedDoubleLimb = 1;
var B: SignedDoubleLimb = 0;
var C: SignedDoubleLimb = 0;
var D: SignedDoubleLimb = 1;
while (yh + C != 0 and yh + D != 0) {
const q = @divFloor(xh + A, yh + C);
const qp = @divFloor(xh + B, yh + D);
if (q != qp) {
break;
}
var t = A - q * C;
A = C;
C = t;
t = B - q * D;
B = D;
D = t;
t = xh - q * yh;
xh = yh;
yh = t;
}
if (B == 0) {
// t_big = x % y, r is unused
try r.divTrunc(&t_big, &x, &y);
assert(t_big.isPositive());
x.swap(&y);
y.swap(&t_big);
} else {
var storage: [8]Limb = undefined;
const Ap = fixedIntFromSignedDoubleLimb(A, storage[0..2]).toManaged(limbs_buffer.allocator);
const Bp = fixedIntFromSignedDoubleLimb(B, storage[2..4]).toManaged(limbs_buffer.allocator);
const Cp = fixedIntFromSignedDoubleLimb(C, storage[4..6]).toManaged(limbs_buffer.allocator);
const Dp = fixedIntFromSignedDoubleLimb(D, storage[6..8]).toManaged(limbs_buffer.allocator);
// t_big = Ax + By
try r.mul(&x, &Ap);
try t_big.mul(&y, &Bp);
try t_big.add(&r, &t_big);
// u = Cx + Dy, r as u
try tmp_x.copy(x.toConst());
try x.mul(&tmp_x, &Cp);
try r.mul(&y, &Dp);
try r.add(&x, &r);
x.swap(&t_big);
y.swap(&r);
}
}
// euclidean algorithm
assert(x.toConst().order(y.toConst()) != .lt);
while (!y.toConst().eqlZero()) {
try t_big.divTrunc(&r, &x, &y);
x.swap(&y);
y.swap(&r);
}
result.copy(x.toConst());
}
// Truncates by default.
fn div(q: *Mutable, r: *Mutable, x: *Mutable, y: *Mutable) void {
assert(!y.eqlZero()); // division by zero
assert(q != r); // illegal aliasing
const q_positive = (x.positive == y.positive);
const r_positive = x.positive;
if (x.toConst().orderAbs(y.toConst()) == .lt) {
// q may alias x so handle r first.
r.copy(x.toConst());
r.positive = r_positive;
q.set(0);
return;
}
// Handle trailing zero-words of divisor/dividend. These are not handled in the following
// algorithms.
// Note, there must be a non-zero limb for either.
// const x_trailing = std.mem.indexOfScalar(Limb, x.limbs[0..x.len], 0).?;
// const y_trailing = std.mem.indexOfScalar(Limb, y.limbs[0..y.len], 0).?;
const x_trailing = for (x.limbs[0..x.len], 0..) |xi, i| {
if (xi != 0) break i;
} else unreachable;
const y_trailing = for (y.limbs[0..y.len], 0..) |yi, i| {
if (yi != 0) break i;
} else unreachable;
const xy_trailing = @min(x_trailing, y_trailing);
if (y.len - xy_trailing == 1) {
const divisor = y.limbs[y.len - 1];
// Optimization for small divisor. By using a half limb we can avoid requiring DoubleLimb
// divisions in the hot code path. This may often require compiler_rt software-emulation.
if (divisor < maxInt(HalfLimb)) {
lldiv0p5(q.limbs, &r.limbs[0], x.limbs[xy_trailing..x.len], @as(HalfLimb, @intCast(divisor)));
} else {
lldiv1(q.limbs, &r.limbs[0], x.limbs[xy_trailing..x.len], divisor);
}
q.normalize(x.len - xy_trailing);
q.positive = q_positive;
r.len = 1;
r.positive = r_positive;
} else {
// Shrink x, y such that the trailing zero limbs shared between are removed.
var x0 = Mutable{
.limbs = x.limbs[xy_trailing..],
.len = x.len - xy_trailing,
.positive = true,
};
var y0 = Mutable{
.limbs = y.limbs[xy_trailing..],
.len = y.len - xy_trailing,
.positive = true,
};
divmod(q, r, &x0, &y0);
q.positive = q_positive;
r.positive = r_positive;
}
if (xy_trailing != 0 and r.limbs[r.len - 1] != 0) {
// Manually shift here since we know its limb aligned.
mem.copyBackwards(Limb, r.limbs[xy_trailing..], r.limbs[0..r.len]);
@memset(r.limbs[0..xy_trailing], 0);
r.len += xy_trailing;
}
}
/// Handbook of Applied Cryptography, 14.20
///
/// x = qy + r where 0 <= r < y
/// y is modified but returned intact.
fn divmod(
q: *Mutable,
r: *Mutable,
x: *Mutable,
y: *Mutable,
) void {
// 0.
// Normalize so that y[t] > b/2
const lz = @clz(y.limbs[y.len - 1]);
const norm_shift = if (lz == 0 and y.toConst().isOdd())
limb_bits // Force an extra limb so that y is even.
else
lz;
x.shiftLeft(x.toConst(), norm_shift);
y.shiftLeft(y.toConst(), norm_shift);
const n = x.len - 1;
const t = y.len - 1;
const shift = n - t;
// 1.
// for 0 <= j <= n - t, set q[j] to 0
q.len = shift + 1;
q.positive = true;
@memset(q.limbs[0..q.len], 0);
// 2.
// while x >= y * b^(n - t):
// x -= y * b^(n - t)
// q[n - t] += 1
// Note, this algorithm is performed only once if y[t] > base/2 and y is even, which we
// enforced in step 0. This means we can replace the while with an if.
// Note, multiplication by b^(n - t) comes down to shifting to the right by n - t limbs.
// We can also replace x >= y * b^(n - t) by x/b^(n - t) >= y, and use shifts for that.
{
// x >= y * b^(n - t) can be replaced by x/b^(n - t) >= y.
// 'divide' x by b^(n - t)
var tmp = Mutable{
.limbs = x.limbs[shift..],
.len = x.len - shift,
.positive = true,
};
if (tmp.toConst().order(y.toConst()) != .lt) {
// Perform x -= y * b^(n - t)
// Note, we can subtract y from x[n - t..] and get the result without shifting.
// We can also re-use tmp which already contains the relevant part of x. Note that
// this also edits x.
// Due to the check above, this cannot underflow.
tmp.sub(tmp.toConst(), y.toConst());
// tmp.sub normalized tmp, but we need to normalize x now.
x.limbs.len = tmp.limbs.len + shift;
q.limbs[shift] += 1;
}
}
// 3.
// for i from n down to t + 1, do
var i = n;
while (i >= t + 1) : (i -= 1) {
const k = i - t - 1;
// 3.1.
// if x_i == y_t:
// q[i - t - 1] = b - 1
// else:
// q[i - t - 1] = (x[i] * b + x[i - 1]) / y[t]
if (x.limbs[i] == y.limbs[t]) {
q.limbs[k] = maxInt(Limb);
} else {
const q0 = (@as(DoubleLimb, x.limbs[i]) << limb_bits) | @as(DoubleLimb, x.limbs[i - 1]);
const n0 = @as(DoubleLimb, y.limbs[t]);
q.limbs[k] = @as(Limb, @intCast(q0 / n0));
}
// 3.2
// while q[i - t - 1] * (y[t] * b + y[t - 1] > x[i] * b * b + x[i - 1] + x[i - 2]:
// q[i - t - 1] -= 1
// Note, if y[t] > b / 2 this part is repeated no more than twice.
// Extract from y.
const y0 = if (t > 0) y.limbs[t - 1] else 0;
const y1 = y.limbs[t];
// Extract from x.
// Note, big endian.
const tmp0 = [_]Limb{
x.limbs[i],
if (i >= 1) x.limbs[i - 1] else 0,
if (i >= 2) x.limbs[i - 2] else 0,
};
while (true) {
// Ad-hoc 2x1 multiplication with q[i - t - 1].
// Note, big endian.
var tmp1 = [_]Limb{ 0, undefined, undefined };
tmp1[2] = addMulLimbWithCarry(0, y0, q.limbs[k], &tmp1[0]);
tmp1[1] = addMulLimbWithCarry(0, y1, q.limbs[k], &tmp1[0]);
// Big-endian compare
if (mem.order(Limb, &tmp1, &tmp0) != .gt)
break;
q.limbs[k] -= 1;
}
// 3.3.
// x -= q[i - t - 1] * y * b^(i - t - 1)
// Note, we multiply by a single limb here.
// The shift doesn't need to be performed if we add the result of the first multiplication
// to x[i - t - 1].
const underflow = llmulLimb(.sub, x.limbs[k..x.len], y.limbs[0..y.len], q.limbs[k]);
// 3.4.
// if x < 0:
// x += y * b^(i - t - 1)
// q[i - t - 1] -= 1
// Note, we check for x < 0 using the underflow flag from the previous operation.
if (underflow) {
// While we didn't properly set the signedness of x, this operation should 'flow' it back to positive.
llaccum(.add, x.limbs[k..x.len], y.limbs[0..y.len]);
q.limbs[k] -= 1;
}
}
x.normalize(x.len);
q.normalize(q.len);
// De-normalize r and y.
r.shiftRight(x.toConst(), norm_shift);
y.shiftRight(y.toConst(), norm_shift);
}
/// Truncate an integer to a number of bits, following 2s-complement semantics.
/// `r` may alias `a`.
///
/// Asserts `r` has enough storage to compute the result.
/// The upper bound is `calcTwosCompLimbCount(a.len)`.
pub fn truncate(r: *Mutable, a: Const, signedness: Signedness, bit_count: usize) void {
// Handle 0-bit integers.
if (bit_count == 0) {
@branchHint(.unlikely);
r.set(0);
return;
}
const max_limbs = calcTwosCompLimbCount(bit_count);
const sign_bit = @as(Limb, 1) << @truncate(bit_count - 1);
const mask = @as(Limb, maxInt(Limb)) >> @truncate(-%bit_count);
// Guess whether the result will have the same sign as `a`.
// * If the result will be signed zero, the guess is `true`.
// * If the result will be the minimum signed integer, the guess is `false`.
// * If the result will be unsigned zero, the guess is `a.positive`.
// * Otherwise the guess is correct.
const same_sign_guess = switch (signedness) {
.signed => max_limbs > a.limbs.len or a.limbs[max_limbs - 1] & sign_bit == 0,
.unsigned => a.positive,
};
const abs_trunc_a: Const = .{
.positive = true,
.limbs = a.limbs[0..llnormalize(a.limbs[0..@min(a.limbs.len, max_limbs)])],
};
if (same_sign_guess or abs_trunc_a.eqlZero()) {
// One of the following is true:
// * The result is zero.
// * The result is non-zero and has the same sign as `a`.
r.copy(abs_trunc_a);
if (max_limbs <= r.len) r.limbs[max_limbs - 1] &= mask;
r.normalize(r.len);
r.positive = a.positive or r.eqlZero();
} else {
// One of the following is true:
// * The result is the minimum signed integer.
// * The result is unsigned zero.
// * The result is non-zero and has the opposite sign as `a`.
r.addScalar(abs_trunc_a, -1);
llnot(r.limbs[0..r.len]);
@memset(r.limbs[r.len..max_limbs], maxInt(Limb));
r.limbs[max_limbs - 1] &= mask;
r.normalize(max_limbs);
r.positive = switch (signedness) {
// The only value with the sign bit still set is the minimum signed integer.
.signed => !a.positive and r.limbs[max_limbs - 1] & sign_bit == 0,
.unsigned => !a.positive or r.eqlZero(),
};
}
}
/// Saturate an integer to a number of bits, following 2s-complement semantics.
/// r may alias a.
///
/// Asserts `r` has enough storage to store the result.
/// The upper bound is `calcTwosCompLimbCount(a.len)`.
pub fn saturate(r: *Mutable, a: Const, signedness: Signedness, bit_count: usize) void {
if (!a.fitsInTwosComp(signedness, bit_count)) {
r.setTwosCompIntLimit(if (r.positive) .max else .min, signedness, bit_count);
}
}
/// Read the value of `x` from `buffer`.
/// Asserts that `buffer` is large enough to contain a value of bit-size `bit_count`.
///
/// The contents of `buffer` are interpreted as if they were the contents of
/// @ptrCast(*[buffer.len]const u8, &x). Byte ordering is determined by `endian`
/// and any required padding bits are expected on the MSB end.
pub fn readTwosComplement(
x: *Mutable,
buffer: []const u8,
bit_count: usize,
endian: Endian,
signedness: Signedness,
) void {
return readPackedTwosComplement(x, buffer, 0, bit_count, endian, signedness);
}
/// Read the value of `x` from a packed memory `buffer`.
/// Asserts that `buffer` is large enough to contain a value of bit-size `bit_count`
/// at offset `bit_offset`.
///
/// This is equivalent to loading the value of an integer with `bit_count` bits as
/// if it were a field in packed memory at the provided bit offset.
pub fn readPackedTwosComplement(
x: *Mutable,
buffer: []const u8,
bit_offset: usize,
bit_count: usize,
endian: Endian,
signedness: Signedness,
) void {
if (bit_count == 0) {
x.limbs[0] = 0;
x.len = 1;
x.positive = true;
return;
}
// Check whether the input is negative
var positive = true;
if (signedness == .signed) {
const total_bits = bit_offset + bit_count;
const last_byte = switch (endian) {
.little => ((total_bits + 7) / 8) - 1,
.big => buffer.len - ((total_bits + 7) / 8),
};
const sign_bit = @as(u8, 1) << @as(u3, @intCast((total_bits - 1) % 8));
positive = ((buffer[last_byte] & sign_bit) == 0);
}
// Copy all complete limbs
var carry: u1 = 1;
var limb_index: usize = 0;
var bit_index: usize = 0;
while (limb_index < bit_count / @bitSizeOf(Limb)) : (limb_index += 1) {
// Read one Limb of bits
var limb = mem.readPackedInt(Limb, buffer, bit_index + bit_offset, endian);
bit_index += @bitSizeOf(Limb);
// 2's complement (bitwise not, then add carry bit)
if (!positive) {
const ov = @addWithOverflow(~limb, carry);
limb = ov[0];
carry = ov[1];
}
x.limbs[limb_index] = limb;
}
// Copy the remaining bits
if (bit_count != bit_index) {
// Read all remaining bits
var limb = switch (signedness) {
.unsigned => mem.readVarPackedInt(Limb, buffer, bit_index + bit_offset, bit_count - bit_index, endian, .unsigned),
.signed => b: {
const SLimb = std.meta.Int(.signed, @bitSizeOf(Limb));
const limb = mem.readVarPackedInt(SLimb, buffer, bit_index + bit_offset, bit_count - bit_index, endian, .signed);
break :b @as(Limb, @bitCast(limb));
},
};
// 2's complement (bitwise not, then add carry bit)
if (!positive) {
const ov = @addWithOverflow(~limb, carry);
assert(ov[1] == 0);
limb = ov[0];
}
x.limbs[limb_index] = limb;
limb_index += 1;
}
x.positive = positive;
x.len = limb_index;
x.normalize(x.len);
}
/// Normalize a possible sequence of leading zeros.
///
/// [1, 2, 3, 4, 0] -> [1, 2, 3, 4]
/// [1, 2, 0, 0, 0] -> [1, 2]
/// [0, 0, 0, 0, 0] -> [0]
pub fn normalize(r: *Mutable, length: usize) void {
r.len = llnormalize(r.limbs[0..length]);
}
}