From ed88533fcf125439229c6fa15bc21909ba12c1ac Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Sat, 27 Sep 2025 21:01:52 +0200 Subject: [PATCH 1/6] Microptimization. Fix range obscurity and compile error. Revert change for converting comptime int. --- lib/std/math/gcd.zig | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/lib/std/math/gcd.zig b/lib/std/math/gcd.zig index 16ca7846f19a..cd56c541b426 100644 --- a/lib/std/math/gcd.zig +++ b/lib/std/math/gcd.zig @@ -9,12 +9,14 @@ pub fn gcd(a: anytype, b: anytype) @TypeOf(a, b) { comptime_int => std.math.IntFittingRange(@min(a, b), @max(a, b)), else => |T| T, }; + if (@typeInfo(N) != .int or @typeInfo(N).int.signedness != .unsigned) { - @compileError("`a` and `b` must be usigned integers"); + @compileError("`a` and `b` must be unsigned integers"); } // using an optimised form of Stein's algorithm: // https://en.wikipedia.org/wiki/Binary_GCD_algorithm + std.debug.assert(a != 0 or b != 0); if (a == 0) return b; @@ -26,25 +28,27 @@ pub fn gcd(a: anytype, b: anytype) @TypeOf(a, b) { const xz = @ctz(x); const yz = @ctz(y); const shift = @min(xz, yz); - x >>= @intCast(xz); - y >>= @intCast(yz); - - var diff = y -% x; - while (diff != 0) : (diff = y -% x) { - // ctz is invariant under negation, we - // put it here to ease data dependencies, - // makes the CPU happy. - const zeros = @ctz(diff); - if (x > y) diff = -%diff; - y = @min(x, y); - x = diff >> @intCast(zeros); + x = @shrExact(x, @intCast(xz)); + y = @shrExact(y, @intCast(yz)); + + var y_minus_x = y -% x; + while (y_minus_x != 0) : (y_minus_x = y -% x) { + const copy_x = x; + const zeros = @ctz(y_minus_x); + const carry = x < y; + x -%= y; + if (carry) { + x = y_minus_x; + y = copy_x; + } + x = @shrExact(x, @intCast(zeros)); } - return y << @intCast(shift); + + return @shlExact(y, @intCast(shift)); } test gcd { const expectEqual = std.testing.expectEqual; - try expectEqual(gcd(0, 5), 5); try expectEqual(gcd(5, 0), 5); try expectEqual(gcd(8, 12), 4); From 062dc9473e534c473b066d7d94e18fdb0a03bd6e Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Fri, 10 Oct 2025 19:15:59 +0200 Subject: [PATCH 2/6] Add EGCD. Fix some comments in GCD. Make ml_kem use lcm and egcd from std/math. Fix name. Add egcd function. Don't destructure. Use binary gcd and make overflow safe. Force inlining, use ctz to reduce dependency in loop. Avoid integer overflow for temporary value. Add test against previous overflow capability. More optimization friendly expression. Fix egcd for even numbers. Minvalue causes crash. Remove helper function. Fix casting issues. Use shift instead division (to support i2) and avoid overflow of temp results. --- lib/std/crypto/ml_kem.zig | 2 +- lib/std/math.zig | 1 + lib/std/math/egcd.zig | 241 ++++++++++++++++++++++++++++++++++++++ lib/std/math/gcd.zig | 6 +- 4 files changed, 246 insertions(+), 4 deletions(-) create mode 100644 lib/std/math/egcd.zig diff --git a/lib/std/crypto/ml_kem.zig b/lib/std/crypto/ml_kem.zig index 9badac5d793f..d5e3407d5c9c 100644 --- a/lib/std/crypto/ml_kem.zig +++ b/lib/std/crypto/ml_kem.zig @@ -636,7 +636,7 @@ test "invNTTReductions bounds" { fn invertMod(a: anytype, p: @TypeOf(a)) @TypeOf(a) { const r = extendedEuclidean(@TypeOf(a), a, p); assert(r.gcd == 1); - return r.x; + return r.bezout_coeff_1; } // Reduce mod q for testing. diff --git a/lib/std/math.zig b/lib/std/math.zig index 21611799fd63..6d3e9e20cf8a 100644 --- a/lib/std/math.zig +++ b/lib/std/math.zig @@ -238,6 +238,7 @@ pub const sinh = @import("math/sinh.zig").sinh; pub const cosh = @import("math/cosh.zig").cosh; pub const tanh = @import("math/tanh.zig").tanh; pub const gcd = @import("math/gcd.zig").gcd; +pub const egcd = @import("math/egcd.zig").egcd; pub const lcm = @import("math/lcm.zig").lcm; pub const gamma = @import("math/gamma.zig").gamma; pub const lgamma = @import("math/gamma.zig").lgamma; diff --git a/lib/std/math/egcd.zig b/lib/std/math/egcd.zig new file mode 100644 index 000000000000..ec7471724e7a --- /dev/null +++ b/lib/std/math/egcd.zig @@ -0,0 +1,241 @@ +//! Extended Greatest Common Divisor (https://mathworld.wolfram.com/ExtendedGreatestCommonDivisor.html) +const std = @import("../std.zig"); + +/// Result type of `egcd`. +pub fn ExtendedGreatestCommonDivisor(S: anytype) type { + const N = switch (S) { + comptime_int => comptime_int, + else => |T| std.meta.Int(.unsigned, @bitSizeOf(T)), + }; + + return struct { + gcd: N, + bezout_coeff_1: S, + bezout_coeff_2: S, + }; +} + +/// Returns the Extended Greatest Common Divisor (EGCD) of two signed integers (`a` and `b`) which are not both zero. +pub fn egcd(a: anytype, b: anytype) ExtendedGreatestCommonDivisor(@TypeOf(a, b)) { + const S = switch (@TypeOf(a, b)) { + comptime_int => b: { + const n = @max(@abs(a), @abs(b)); + break :b std.math.IntFittingRange(-n, n); + }, + else => |T| T, + }; + if (@typeInfo(S) != .int or @typeInfo(S).int.signedness != .signed) { + @compileError("`a` and `b` must be signed integers"); + } + + std.debug.assert(a != 0 or b != 0); + + if (a == 0) return .{ .gcd = @abs(b), .bezout_coeff_1 = 0, .bezout_coeff_2 = std.math.sign(b) }; + if (b == 0) return .{ .gcd = @abs(a), .bezout_coeff_1 = std.math.sign(a), .bezout_coeff_2 = 0 }; + + const other: S, const odd: S, const shift, const switch_coeff = b: { + const xz = @ctz(@as(S, a)); + const yz = @ctz(@as(S, b)); + break :b if (xz < yz) .{ b, a, xz, true } else .{ a, b, yz, false }; + }; + const toinv = @shrExact(other, @intCast(shift)); + const ctrl = @shrExact(odd, @intCast(shift)); // Invariant: |s|, |t|, |ctrl| < |MIN_OF(S)| + const half_ctrl = 1 + @shrExact(ctrl - 1, 1); + const abs_ctrl = @abs(ctrl); + + var s: S = std.math.sign(toinv); + var t: S = 0; + + var x = @abs(toinv); + var y = abs_ctrl; + + { + const xz = @ctz(x); + x = @shrExact(x, @intCast(xz)); + for (0..xz) |_| { + const half_s = s >> 1; + if (s & 1 == 0) + s = half_s + else + s = half_s + half_ctrl; + } + } + + var y_minus_x = y -% x; + while (y_minus_x != 0) : (y_minus_x = y -% x) { + const t_minus_s = t - s; + const copy_x = x; + const copy_s = s; + const xz = @ctz(y_minus_x); + + s -= t; + const carry = x < y; + x -%= y; + if (carry) { + x = y_minus_x; + y = copy_x; + s = t_minus_s; + t = copy_s; + } + x = @shrExact(x, @intCast(xz)); + for (0..xz) |_| { + const half_s = s >> 1; + if (s & 1 == 0) + s = half_s + else + s = half_s + half_ctrl; + } + + if (s < 0) s = @intCast(abs_ctrl - @abs(s)); + } + + // Using integer widening is only a temporary solution. + const W = std.meta.Int(.signed, @bitSizeOf(S) * 2); + t = @intCast(@divExact(y - @as(W, s) * toinv, ctrl)); + const final_s, const final_t = if (switch_coeff) .{ t, s } else .{ s, t }; + return .{ + .gcd = @shlExact(y, @intCast(shift)), + .bezout_coeff_1 = final_s, + .bezout_coeff_2 = final_t, + }; +} + +test { + { + const a: i2 = 0; + const b: i2 = 1; + const r = egcd(a, b); + const g = r.gcd; + const s: i2 = r.bezout_coeff_1; + const t: i2 = r.bezout_coeff_2; + try std.testing.expect(s * a + t * b == g); + } + { + const a: i8 = -128; + const b: i8 = 127; + const r = egcd(a, b); + const g = r.gcd; + const s: i16 = r.bezout_coeff_1; + const t: i16 = r.bezout_coeff_2; + try std.testing.expect(s * a + t * b == g); + } + { + const a: i16 = -32768; + const b: i16 = -32768; + const r = egcd(a, b); + const g = r.gcd; + const s: i32 = r.bezout_coeff_1; + const t: i32 = r.bezout_coeff_2; + try std.testing.expect(s * a + t * b == g); + } + { + const a: i32 = 128; + const b: i32 = 112; + const r = egcd(a, b); + const g = r.gcd; + const s: i64 = r.bezout_coeff_1; + const t: i64 = r.bezout_coeff_2; + try std.testing.expect(s * a + t * b == g); + } + { + const a: i32 = 4 * 89; + const b: i32 = 2 * 17; + const r = egcd(a, b); + const g = r.gcd; + const s: i64 = r.bezout_coeff_1; + const t: i64 = r.bezout_coeff_2; + try std.testing.expect(s * a + t * b == g); + } + { + const a: i8 = 127; + const b: i8 = 126; + const r = egcd(a, b); + const g = r.gcd; + const s: i16 = r.bezout_coeff_1; + const t: i16 = r.bezout_coeff_2; + try std.testing.expect(s * a + t * b == g); + } + { + const a: i4 = -8; + const b: i4 = 1; + const r = egcd(a, b); + const g = r.gcd; + const s = r.bezout_coeff_1; + const t = r.bezout_coeff_2; + try std.testing.expect(s * a + t * b == g); + } + { + const a: i4 = -8; + const b: i4 = 5; + const r = egcd(a, b); + const g = r.gcd; + // Avoid overflow in assert. + const s: i8 = r.bezout_coeff_1; + const t: i8 = r.bezout_coeff_2; + try std.testing.expect(s * a + t * b == g); + } + { + const a: i32 = 0; + const b: i32 = 5; + const r = egcd(a, b); + const g = r.gcd; + const s = r.bezout_coeff_1; + const t = r.bezout_coeff_2; + try std.testing.expect(s * a + t * b == g); + } + { + const a: i32 = 5; + const b: i32 = 0; + const r = egcd(a, b); + const g = r.gcd; + const s = r.bezout_coeff_1; + const t = r.bezout_coeff_2; + try std.testing.expect(s * a + t * b == g); + } + + { + const a: i32 = 21; + const b: i32 = 15; + const r = egcd(a, b); + const g = r.gcd; + const s = r.bezout_coeff_1; + const t = r.bezout_coeff_2; + try std.testing.expect(s * a + t * b == g); + } + { + const a: i32 = -21; + const b: i32 = 15; + const r = egcd(a, b); + const g = r.gcd; + const s = r.bezout_coeff_1; + const t = r.bezout_coeff_2; + try std.testing.expect(s * a + t * b == g); + } + { + const a = -21; + const b = 15; + const r = egcd(a, b); + const g = r.gcd; + const s = r.bezout_coeff_1; + const t = r.bezout_coeff_2; + try std.testing.expect(s * a + t * b == g); + } + { + const a = 927372692193078999176; + const b = 573147844013817084101; + const r = egcd(a, b); + const g = r.gcd; + const s = r.bezout_coeff_1; + const t = r.bezout_coeff_2; + try std.testing.expect(s * a + t * b == g); + } + { + const a = 453973694165307953197296969697410619233826; + const b = 280571172992510140037611932413038677189525; + const r = egcd(a, b); + const g = r.gcd; + const s = r.bezout_coeff_1; + const t = r.bezout_coeff_2; + try std.testing.expect(s * a + t * b == g); + } +} diff --git a/lib/std/math/gcd.zig b/lib/std/math/gcd.zig index cd56c541b426..01c6b8b62914 100644 --- a/lib/std/math/gcd.zig +++ b/lib/std/math/gcd.zig @@ -1,7 +1,7 @@ -//! Greatest common divisor (https://mathworld.wolfram.com/GreatestCommonDivisor.html) -const std = @import("std"); +//! Greatest Common Divisor (https://mathworld.wolfram.com/GreatestCommonDivisor.html) +const std = @import("../std.zig"); -/// Returns the greatest common divisor (GCD) of two unsigned integers (`a` and `b`) which are not both zero. +/// Returns the Greatest Common Divisor (GCD) of two unsigned integers (`a` and `b`) which are not both zero. /// For example, the GCD of `8` and `12` is `4`, that is, `gcd(8, 12) == 4`. pub fn gcd(a: anytype, b: anytype) @TypeOf(a, b) { const N = switch (@TypeOf(a, b)) { From bb4def62df28e8fd37754fae564a5a3af5b31f5e Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Tue, 21 Oct 2025 19:34:59 +0200 Subject: [PATCH 3/6] Remove comment, simplify if. --- lib/std/math/egcd.zig | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/std/math/egcd.zig b/lib/std/math/egcd.zig index ec7471724e7a..2c3e1b530d24 100644 --- a/lib/std/math/egcd.zig +++ b/lib/std/math/egcd.zig @@ -24,9 +24,9 @@ pub fn egcd(a: anytype, b: anytype) ExtendedGreatestCommonDivisor(@TypeOf(a, b)) }, else => |T| T, }; - if (@typeInfo(S) != .int or @typeInfo(S).int.signedness != .signed) { + + if (@typeInfo(S) != .int or @typeInfo(S).int.signedness != .signed) @compileError("`a` and `b` must be signed integers"); - } std.debug.assert(a != 0 or b != 0); @@ -169,7 +169,6 @@ test { const b: i4 = 5; const r = egcd(a, b); const g = r.gcd; - // Avoid overflow in assert. const s: i8 = r.bezout_coeff_1; const t: i8 = r.bezout_coeff_2; try std.testing.expect(s * a + t * b == g); From 16cf4b0dc6de4b0c8ae0c2428662ee8401f21b51 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Tue, 25 Nov 2025 08:02:58 +0100 Subject: [PATCH 4/6] Simplify half_ctlr --- lib/std/math/egcd.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/std/math/egcd.zig b/lib/std/math/egcd.zig index 2c3e1b530d24..68a22bd9f11b 100644 --- a/lib/std/math/egcd.zig +++ b/lib/std/math/egcd.zig @@ -40,8 +40,8 @@ pub fn egcd(a: anytype, b: anytype) ExtendedGreatestCommonDivisor(@TypeOf(a, b)) }; const toinv = @shrExact(other, @intCast(shift)); const ctrl = @shrExact(odd, @intCast(shift)); // Invariant: |s|, |t|, |ctrl| < |MIN_OF(S)| - const half_ctrl = 1 + @shrExact(ctrl - 1, 1); const abs_ctrl = @abs(ctrl); + const half_ctrl: S = @intCast(1 + abs_ctrl >> 1); var s: S = std.math.sign(toinv); var t: S = 0; From 564b7a1b3adf04f08b212aee2b4a9060019f58f0 Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Tue, 25 Nov 2025 08:10:11 +0100 Subject: [PATCH 5/6] Rename result type --- lib/std/math/egcd.zig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/std/math/egcd.zig b/lib/std/math/egcd.zig index 68a22bd9f11b..76ca184a090b 100644 --- a/lib/std/math/egcd.zig +++ b/lib/std/math/egcd.zig @@ -2,7 +2,7 @@ const std = @import("../std.zig"); /// Result type of `egcd`. -pub fn ExtendedGreatestCommonDivisor(S: anytype) type { +pub fn Result(S: anytype) type { const N = switch (S) { comptime_int => comptime_int, else => |T| std.meta.Int(.unsigned, @bitSizeOf(T)), @@ -16,7 +16,7 @@ pub fn ExtendedGreatestCommonDivisor(S: anytype) type { } /// Returns the Extended Greatest Common Divisor (EGCD) of two signed integers (`a` and `b`) which are not both zero. -pub fn egcd(a: anytype, b: anytype) ExtendedGreatestCommonDivisor(@TypeOf(a, b)) { +pub fn egcd(a: anytype, b: anytype) Result(@TypeOf(a, b)) { const S = switch (@TypeOf(a, b)) { comptime_int => b: { const n = @max(@abs(a), @abs(b)); From 9a30054ff06da13e96e0b2adf16989ec3a3a0a8c Mon Sep 17 00:00:00 2001 From: Said Kadrioski Date: Tue, 25 Nov 2025 08:13:00 +0100 Subject: [PATCH 6/6] Revert artifact --- lib/std/crypto/ml_kem.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/std/crypto/ml_kem.zig b/lib/std/crypto/ml_kem.zig index d5e3407d5c9c..9badac5d793f 100644 --- a/lib/std/crypto/ml_kem.zig +++ b/lib/std/crypto/ml_kem.zig @@ -636,7 +636,7 @@ test "invNTTReductions bounds" { fn invertMod(a: anytype, p: @TypeOf(a)) @TypeOf(a) { const r = extendedEuclidean(@TypeOf(a), a, p); assert(r.gcd == 1); - return r.bezout_coeff_1; + return r.x; } // Reduce mod q for testing.