From a4281364e8ee3a6373b1e586c8dee76cb74a09bb Mon Sep 17 00:00:00 2001
From: Katharine Hyatt <kslimes@gmail.com>
Date: Tue, 6 Jan 2026 19:06:52 +0100
Subject: [PATCH 01/10] Test svd_trunc for GPU

---
 test/svd.jl           | 16 ++++++++--------
 test/testsuite/svd.jl |  8 ++++----
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/test/svd.jl b/test/svd.jl
index 40d0528e..1593ff3d 100644
--- a/test/svd.jl
+++ b/test/svd.jl
@@ -17,28 +17,28 @@ for T in (BLASFloats..., GenericFloats...), m in (0, 54), n in (0, 37, m, 63)
     TestSuite.seed_rng!(123)
     if T ∈ BLASFloats
         if CUDA.functional()
-            TestSuite.test_svd(CuMatrix{T}, (m, n); test_trunc = false)
+            TestSuite.test_svd(CuMatrix{T}, (m, n))
             CUDA_SVD_ALGS = (
                 CUSOLVER_QRIteration(),
                 CUSOLVER_SVDPolar(),
                 CUSOLVER_Jacobi(),
             )
-            TestSuite.test_svd_algs(CuMatrix{T}, (m, n), CUDA_SVD_ALGS; test_trunc = false)
+            TestSuite.test_svd_algs(CuMatrix{T}, (m, n), CUDA_SVD_ALGS)
             if n == m
-                TestSuite.test_svd(Diagonal{T, CuVector{T}}, m; test_trunc = false)
-                TestSuite.test_svd_algs(Diagonal{T, CuVector{T}}, m, (DiagonalAlgorithm(),); test_trunc = false)
+                TestSuite.test_svd(Diagonal{T, CuVector{T}}, m)
+                TestSuite.test_svd_algs(Diagonal{T, CuVector{T}}, m, (DiagonalAlgorithm(),))
             end
         end
         if AMDGPU.functional()
-            TestSuite.test_svd(ROCMatrix{T}, (m, n); test_trunc = false)
+            TestSuite.test_svd(ROCMatrix{T}, (m, n))
             AMD_SVD_ALGS = (
                 ROCSOLVER_QRIteration(),
                 ROCSOLVER_Jacobi(),
             )
-            TestSuite.test_svd_algs(ROCMatrix{T}, (m, n), AMD_SVD_ALGS; test_trunc = false)
+            TestSuite.test_svd_algs(ROCMatrix{T}, (m, n), AMD_SVD_ALGS)
             if n == m
-                TestSuite.test_svd(Diagonal{T, ROCVector{T}}, m; test_trunc = false)
-                TestSuite.test_svd_algs(Diagonal{T, ROCVector{T}}, m, (DiagonalAlgorithm(),); test_trunc = false)
+                TestSuite.test_svd(Diagonal{T, ROCVector{T}}, m)
+                TestSuite.test_svd_algs(Diagonal{T, ROCVector{T}}, m, (DiagonalAlgorithm(),))
             end
         end
     end
diff --git a/test/testsuite/svd.jl b/test/testsuite/svd.jl
index d1d8ca33..3b96ecd0 100644
--- a/test/testsuite/svd.jl
+++ b/test/testsuite/svd.jl
@@ -2,21 +2,21 @@ using TestExtras
 using GenericLinearAlgebra
 using LinearAlgebra: opnorm
 
-function test_svd(T::Type, sz; test_trunc = true, kwargs...)
+function test_svd(T::Type, sz; kwargs...)
     summary_str = testargs_summary(T, sz)
     return @testset "svd $summary_str" begin
         test_svd_compact(T, sz; kwargs...)
         test_svd_full(T, sz; kwargs...)
-        test_trunc && test_svd_trunc(T, sz; kwargs...)
+        test_svd_trunc(T, sz; kwargs...)
     end
 end
 
-function test_svd_algs(T::Type, sz, algs; test_trunc = true, kwargs...)
+function test_svd_algs(T::Type, sz, algs; kwargs...)
     summary_str = testargs_summary(T, sz)
     return @testset "svd algorithms $summary_str" begin
         test_svd_compact_algs(T, sz, algs; kwargs...)
         test_svd_full_algs(T, sz, algs; kwargs...)
-        test_trunc && test_svd_trunc_algs(T, sz, algs; kwargs...)
+        test_svd_trunc_algs(T, sz, algs; kwargs...)
     end
 end
 

From 9a88b6c734cc17fdc908717d0c2e2af16bf36431 Mon Sep 17 00:00:00 2001
From: Lukas Devos <ldevos98@gmail.com>
Date: Wed, 7 Jan 2026 13:20:16 +0100
Subject: [PATCH 02/10] scalar indexing in tests

---
 Project.toml                |  5 ++++-
 test/testsuite/TestSuite.jl |  1 +
 test/testsuite/svd.jl       | 12 ++++++------
 3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/Project.toml b/Project.toml
index 635ea305..e6a30fae 100644
--- a/Project.toml
+++ b/Project.toml
@@ -30,6 +30,7 @@ ChainRulesTestUtils = "1"
 CUDA = "5"
 GenericLinearAlgebra = "0.3.19"
 GenericSchur = "0.5.6"
+GPUArrays = "11"
 JET = "0.9, 0.10"
 LinearAlgebra = "1"
 Mooncake = "0.4.183"
@@ -46,6 +47,7 @@ AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
 Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
 ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a"
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
+GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
 JET = "c3a54625-cd67-489e-a8e7-0a5a0ff4e31b"
 Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
@@ -56,4 +58,5 @@ TestExtras = "5ed8adda-3752-4e41-b88a-e8b09835ee3a"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [targets]
-test = ["Aqua", "JET", "SafeTestsets", "Test", "TestExtras", "ChainRulesCore", "ChainRulesTestUtils", "Random", "StableRNGs", "Zygote", "CUDA", "AMDGPU", "GenericLinearAlgebra", "GenericSchur", "Mooncake"]
+test = ["Aqua", "JET", "SafeTestsets", "Test", "TestExtras", "ChainRulesCore",
+    "ChainRulesTestUtils", "Random", "StableRNGs", "Zygote", "CUDA", "AMDGPU", "GPUArrays", "GenericLinearAlgebra", "GenericSchur", "Mooncake"]
diff --git a/test/testsuite/TestSuite.jl b/test/testsuite/TestSuite.jl
index a0763c7f..f0fb604f 100644
--- a/test/testsuite/TestSuite.jl
+++ b/test/testsuite/TestSuite.jl
@@ -14,6 +14,7 @@ using MatrixAlgebraKit: diagview
 using LinearAlgebra: Diagonal, norm, istriu, istril, I
 using Random, StableRNGs
 using AMDGPU, CUDA
+using GPUArrays: @allowscalar
 
 const tests = Dict()
 
diff --git a/test/testsuite/svd.jl b/test/testsuite/svd.jl
index 3b96ecd0..e559c3c4 100644
--- a/test/testsuite/svd.jl
+++ b/test/testsuite/svd.jl
@@ -167,12 +167,12 @@ function test_svd_trunc(
             U1, S1, V1ᴴ, ϵ1 = @testinferred svd_trunc(A; trunc = truncrank(r))
             @test length(diagview(S1)) == r
             @test diagview(S1) ≈ S₀[1:r]
-            @test opnorm(A - U1 * S1 * V1ᴴ) ≈ S₀[r + 1]
+            @test opnorm(A - U1 * S1 * V1ᴴ) ≈ @allowscalar S₀[r + 1]
             # Test truncation error
             @test ϵ1 ≈ norm(view(S₀, (r + 1):minmn)) atol = atol
 
             s = 1 + sqrt(eps(real(eltype(T))))
-            trunc = trunctol(; atol = s * S₀[r + 1])
+            trunc = trunctol(; atol = s * @allowscalar(S₀[r + 1]))
 
             U2, S2, V2ᴴ, ϵ2 = @testinferred svd_trunc(A; trunc)
             @test length(diagview(S2)) == r
@@ -253,7 +253,7 @@ function test_svd_trunc_algs(
             @test ϵ1 ≈ norm(view(S₀, (r + 1):minmn)) atol = atol
 
             s = 1 + sqrt(eps(real(eltype(T))))
-            trunc = trunctol(; atol = s * S₀[r + 1])
+            trunc = trunctol(; atol = s * @allowscalar(S₀[r + 1]))
 
             U2, S2, V2ᴴ, ϵ2 = @testinferred svd_trunc(A; trunc, alg)
             @test length(diagview(S2)) == r
@@ -285,11 +285,11 @@ function test_svd_trunc_algs(
                 )
                 U1, S1, V1ᴴ, ϵ1 = svd_trunc(A; trunc = trunc_fun(0.2, 1), alg)
                 @test length(diagview(S1)) == 1
-                @test diagview(S1) ≈ diagview(S)[1:1]
+                @test collect(diagview(S1)) ≈ collect(diagview(S)[1:1])
 
                 U2, S2, V2ᴴ, ϵ2 = svd_trunc(A; trunc = trunc_fun(0.2, 3), alg)
                 @test length(diagview(S2)) == 2
-                @test diagview(S2) ≈ diagview(S)[1:2]
+                @test collect(diagview(S2)) ≈ collect(diagview(S)[1:2])
             end
         end
         @testset "specify truncation algorithm" begin
@@ -303,7 +303,7 @@ function test_svd_trunc_algs(
             A = U * S * Vᴴ
             truncalg = TruncatedAlgorithm(alg, trunctol(; atol = 0.2))
             U2, S2, V2ᴴ, ϵ2 = @testinferred svd_trunc(A; alg = truncalg)
-            @test diagview(S2) ≈ diagview(S)[1:2]
+            @test collect(diagview(S2)) ≈ collect(diagview(S)[1:2])
             @test ϵ2 ≈ norm(diagview(S)[3:4]) atol = atol
             @test_throws ArgumentError svd_trunc(A; alg = truncalg, trunc = (; maxrank = 2))
             @test_throws ArgumentError svd_trunc_no_error(A; alg = truncalg, trunc = (; maxrank = 2))

From 33857b253b90bfcd5c2701dc55064596c667db3b Mon Sep 17 00:00:00 2001
From: Lukas Devos <ldevos98@gmail.com>
Date: Wed, 7 Jan 2026 14:05:05 +0100
Subject: [PATCH 03/10] bypass intersect on the GPU

---
 ext/MatrixAlgebraKitAMDGPUExt/MatrixAlgebraKitAMDGPUExt.jl | 5 +++++
 ext/MatrixAlgebraKitCUDAExt/MatrixAlgebraKitCUDAExt.jl     | 5 +++++
 2 files changed, 10 insertions(+)

diff --git a/ext/MatrixAlgebraKitAMDGPUExt/MatrixAlgebraKitAMDGPUExt.jl b/ext/MatrixAlgebraKitAMDGPUExt/MatrixAlgebraKitAMDGPUExt.jl
index abfa6353..befb4e0b 100644
--- a/ext/MatrixAlgebraKitAMDGPUExt/MatrixAlgebraKitAMDGPUExt.jl
+++ b/ext/MatrixAlgebraKitAMDGPUExt/MatrixAlgebraKitAMDGPUExt.jl
@@ -167,4 +167,9 @@ function MatrixAlgebraKit._mul_herm!(C::StridedROCMatrix{T}, A::StridedROCMatrix
     return C
 end
 
+# TODO: intersect on GPU arrays is not working
+MatrixAlgebraKit._ind_intersect(A::ROCVector{Int}, B::AbstractVector) = MatrixAlgebraKit._ind_intersect(collect(A), B)
+MatrixAlgebraKit._ind_intersect(A::AbstractVector, B::ROCVector{Int}) = MatrixAlgebraKit._ind_intersect(A, collect(B))
+MatrixAlgebraKit._ind_intersect(A::ROCVector{Int}, B::ROCVector{Int}) = MatrixAlgebraKit._ind_intersect(collect(A), collect(B))
+
 end
diff --git a/ext/MatrixAlgebraKitCUDAExt/MatrixAlgebraKitCUDAExt.jl b/ext/MatrixAlgebraKitCUDAExt/MatrixAlgebraKitCUDAExt.jl
index e3acb553..432f176a 100644
--- a/ext/MatrixAlgebraKitCUDAExt/MatrixAlgebraKitCUDAExt.jl
+++ b/ext/MatrixAlgebraKitCUDAExt/MatrixAlgebraKitCUDAExt.jl
@@ -191,4 +191,9 @@ function MatrixAlgebraKit._mul_herm!(C::StridedCuMatrix{T}, A::StridedCuMatrix{T
     return C
 end
 
+# TODO: intersect on GPU arrays is not working
+MatrixAlgebraKit._ind_intersect(A::CuVector{Int}, B::AbstractVector) = MatrixAlgebraKit._ind_intersect(collect(A), B)
+MatrixAlgebraKit._ind_intersect(A::AbstractVector, B::CuVector{Int}) = MatrixAlgebraKit._ind_intersect(A, collect(B))
+MatrixAlgebraKit._ind_intersect(A::CuVector{Int}, B::CuVector{Int}) = MatrixAlgebraKit._ind_intersect(collect(A), collect(B))
+
 end

From 80169d8769b8b284bc73a644d8afbc5f1f64e5d0 Mon Sep 17 00:00:00 2001
From: Lukas Devos <ldevos98@gmail.com>
Date: Wed, 7 Jan 2026 14:16:46 +0100
Subject: [PATCH 04/10] more scalar indexing in tests

---
 test/testsuite/svd.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/testsuite/svd.jl b/test/testsuite/svd.jl
index e559c3c4..32f31762 100644
--- a/test/testsuite/svd.jl
+++ b/test/testsuite/svd.jl
@@ -248,7 +248,7 @@ function test_svd_trunc_algs(
             U1, S1, V1ᴴ, ϵ1 = @testinferred svd_trunc(A; trunc = truncrank(r), alg)
             @test length(diagview(S1)) == r
             @test diagview(S1) ≈ S₀[1:r]
-            @test opnorm(A - U1 * S1 * V1ᴴ) ≈ S₀[r + 1]
+            @test opnorm(A - U1 * S1 * V1ᴴ) ≈ @allowscalar S₀[r + 1]
             # Test truncation error
             @test ϵ1 ≈ norm(view(S₀, (r + 1):minmn)) atol = atol
 

From cf91ebdc27e181408d261f0f63f7977a5c1aa5de Mon Sep 17 00:00:00 2001
From: Katharine Hyatt <khyatt@flatironinstitute.org>
Date: Wed, 7 Jan 2026 09:58:53 -0500
Subject: [PATCH 05/10] Get rid of GPUArrays

---
 Project.toml                |  4 +---
 test/svd.jl                 |  1 +
 test/testsuite/TestSuite.jl |  1 -
 test/testsuite/svd.jl       | 16 ++++++++--------
 4 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/Project.toml b/Project.toml
index e6a30fae..cc694527 100644
--- a/Project.toml
+++ b/Project.toml
@@ -30,7 +30,6 @@ ChainRulesTestUtils = "1"
 CUDA = "5"
 GenericLinearAlgebra = "0.3.19"
 GenericSchur = "0.5.6"
-GPUArrays = "11"
 JET = "0.9, 0.10"
 LinearAlgebra = "1"
 Mooncake = "0.4.183"
@@ -47,7 +46,6 @@ AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"
 Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
 ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a"
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
-GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
 JET = "c3a54625-cd67-489e-a8e7-0a5a0ff4e31b"
 Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
@@ -59,4 +57,4 @@ Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [targets]
 test = ["Aqua", "JET", "SafeTestsets", "Test", "TestExtras", "ChainRulesCore",
-    "ChainRulesTestUtils", "Random", "StableRNGs", "Zygote", "CUDA", "AMDGPU", "GPUArrays", "GenericLinearAlgebra", "GenericSchur", "Mooncake"]
+    "ChainRulesTestUtils", "Random", "StableRNGs", "Zygote", "CUDA", "AMDGPU", "GenericLinearAlgebra", "GenericSchur", "Mooncake"]
diff --git a/test/svd.jl b/test/svd.jl
index 1593ff3d..affe2942 100644
--- a/test/svd.jl
+++ b/test/svd.jl
@@ -4,6 +4,7 @@ using TestExtras
 using StableRNGs
 using LinearAlgebra: Diagonal
 using CUDA, AMDGPU
+using CUDA.CUSOLVER # pull in opnorm binding
 
 BLASFloats = (Float32, Float64, ComplexF32, ComplexF64)
 GenericFloats = (BigFloat, Complex{BigFloat})
diff --git a/test/testsuite/TestSuite.jl b/test/testsuite/TestSuite.jl
index f0fb604f..a0763c7f 100644
--- a/test/testsuite/TestSuite.jl
+++ b/test/testsuite/TestSuite.jl
@@ -14,7 +14,6 @@ using MatrixAlgebraKit: diagview
 using LinearAlgebra: Diagonal, norm, istriu, istril, I
 using Random, StableRNGs
 using AMDGPU, CUDA
-using GPUArrays: @allowscalar
 
 const tests = Dict()
 
diff --git a/test/testsuite/svd.jl b/test/testsuite/svd.jl
index 32f31762..e8af9d68 100644
--- a/test/testsuite/svd.jl
+++ b/test/testsuite/svd.jl
@@ -160,19 +160,19 @@ function test_svd_trunc(
         Ac = deepcopy(A)
         m, n = size(A)
         minmn = min(m, n)
-        S₀ = svd_vals(A)
+        S₀ = collect(svd_vals(A))
         r = minmn - 2
 
         if m > 0 && n > 0
             U1, S1, V1ᴴ, ϵ1 = @testinferred svd_trunc(A; trunc = truncrank(r))
             @test length(diagview(S1)) == r
-            @test diagview(S1) ≈ S₀[1:r]
-            @test opnorm(A - U1 * S1 * V1ᴴ) ≈ @allowscalar S₀[r + 1]
+            @test collect(diagview(S1)) ≈ S₀[1:r]
+            @test opnorm(A - U1 * S1 * V1ᴴ) ≈ S₀[r + 1]
             # Test truncation error
             @test ϵ1 ≈ norm(view(S₀, (r + 1):minmn)) atol = atol
 
             s = 1 + sqrt(eps(real(eltype(T))))
-            trunc = trunctol(; atol = s * @allowscalar(S₀[r + 1]))
+            trunc = trunctol(; atol = s * S₀[r + 1])
 
             U2, S2, V2ᴴ, ϵ2 = @testinferred svd_trunc(A; trunc)
             @test length(diagview(S2)) == r
@@ -241,19 +241,19 @@ function test_svd_trunc_algs(
         Ac = deepcopy(A)
         m, n = size(A)
         minmn = min(m, n)
-        S₀ = svd_vals(A)
+        S₀ = collect(svd_vals(A))
         r = minmn - 2
 
         if m > 0 && n > 0
             U1, S1, V1ᴴ, ϵ1 = @testinferred svd_trunc(A; trunc = truncrank(r), alg)
             @test length(diagview(S1)) == r
-            @test diagview(S1) ≈ S₀[1:r]
-            @test opnorm(A - U1 * S1 * V1ᴴ) ≈ @allowscalar S₀[r + 1]
+            @test collect(diagview(S1)) ≈ S₀[1:r]
+            @test opnorm(A - U1 * S1 * V1ᴴ) ≈ S₀[r + 1]
             # Test truncation error
             @test ϵ1 ≈ norm(view(S₀, (r + 1):minmn)) atol = atol
 
             s = 1 + sqrt(eps(real(eltype(T))))
-            trunc = trunctol(; atol = s * @allowscalar(S₀[r + 1]))
+            trunc = trunctol(; atol = s * S₀[r + 1])
 
             U2, S2, V2ᴴ, ϵ2 = @testinferred svd_trunc(A; trunc, alg)
             @test length(diagview(S2)) == r

From 01b3d0d076810797f8a0b44420af04504df4cc06 Mon Sep 17 00:00:00 2001
From: Katharine Hyatt <khyatt@flatironinstitute.org>
Date: Wed, 7 Jan 2026 10:19:11 -0500
Subject: [PATCH 06/10] Try to unbreak AMDGPU

---
 test/testsuite/svd.jl | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/test/testsuite/svd.jl b/test/testsuite/svd.jl
index e8af9d68..7b0c1396 100644
--- a/test/testsuite/svd.jl
+++ b/test/testsuite/svd.jl
@@ -167,7 +167,8 @@ function test_svd_trunc(
             U1, S1, V1ᴴ, ϵ1 = @testinferred svd_trunc(A; trunc = truncrank(r))
             @test length(diagview(S1)) == r
             @test collect(diagview(S1)) ≈ S₀[1:r]
-            @test opnorm(A - U1 * S1 * V1ᴴ) ≈ S₀[r + 1]
+            AUSV_vals = svd_vals(A - U1 * S1 * V1ᴴ) # bypass broken svdvals on AMDGPU
+            @test opnorm(Diagonal(AUSV_vals)) ≈ S₀[r + 1]
             # Test truncation error
             @test ϵ1 ≈ norm(view(S₀, (r + 1):minmn)) atol = atol
 
@@ -248,7 +249,8 @@ function test_svd_trunc_algs(
             U1, S1, V1ᴴ, ϵ1 = @testinferred svd_trunc(A; trunc = truncrank(r), alg)
             @test length(diagview(S1)) == r
             @test collect(diagview(S1)) ≈ S₀[1:r]
-            @test opnorm(A - U1 * S1 * V1ᴴ) ≈ S₀[r + 1]
+            AUSV_vals = svd_vals(A - U1 * S1 * V1ᴴ) # bypass broken svdvals on AMDGPU
+            @test opnorm(Diagonal(AUSV_vals)) ≈ S₀[r + 1]
             # Test truncation error
             @test ϵ1 ≈ norm(view(S₀, (r + 1):minmn)) atol = atol
 

From 5eab7f6b5ed694e81d5ae5e488e8e62cdba27c18 Mon Sep 17 00:00:00 2001
From: Katharine Hyatt <khyatt@flatironinstitute.org>
Date: Wed, 7 Jan 2026 10:47:14 -0500
Subject: [PATCH 07/10] Actually fix

---
 test/testsuite/svd.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/testsuite/svd.jl b/test/testsuite/svd.jl
index 7b0c1396..ac7a58e6 100644
--- a/test/testsuite/svd.jl
+++ b/test/testsuite/svd.jl
@@ -168,7 +168,7 @@ function test_svd_trunc(
             @test length(diagview(S1)) == r
             @test collect(diagview(S1)) ≈ S₀[1:r]
             AUSV_vals = svd_vals(A - U1 * S1 * V1ᴴ) # bypass broken svdvals on AMDGPU
-            @test opnorm(Diagonal(AUSV_vals)) ≈ S₀[r + 1]
+            @test mapreduce(sv->opnorm(sv, 2), max, AUSV_vals) ≈ S₀[r + 1]
             # Test truncation error
             @test ϵ1 ≈ norm(view(S₀, (r + 1):minmn)) atol = atol
 
@@ -250,7 +250,7 @@ function test_svd_trunc_algs(
             @test length(diagview(S1)) == r
             @test collect(diagview(S1)) ≈ S₀[1:r]
             AUSV_vals = svd_vals(A - U1 * S1 * V1ᴴ) # bypass broken svdvals on AMDGPU
-            @test opnorm(Diagonal(AUSV_vals)) ≈ S₀[r + 1]
+            @test mapreduce(sv->opnorm(sv, 2), max, AUSV_vals) ≈ S₀[r + 1]
             # Test truncation error
             @test ϵ1 ≈ norm(view(S₀, (r + 1):minmn)) atol = atol
 

From a460df5a506ff582e640524d77c3e9ed626968c2 Mon Sep 17 00:00:00 2001
From: Katharine Hyatt <khyatt@flatironinstitute.org>
Date: Wed, 7 Jan 2026 10:52:00 -0500
Subject: [PATCH 08/10] Format

---
 test/testsuite/svd.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/testsuite/svd.jl b/test/testsuite/svd.jl
index ac7a58e6..800018b2 100644
--- a/test/testsuite/svd.jl
+++ b/test/testsuite/svd.jl
@@ -168,7 +168,7 @@ function test_svd_trunc(
             @test length(diagview(S1)) == r
             @test collect(diagview(S1)) ≈ S₀[1:r]
             AUSV_vals = svd_vals(A - U1 * S1 * V1ᴴ) # bypass broken svdvals on AMDGPU
-            @test mapreduce(sv->opnorm(sv, 2), max, AUSV_vals) ≈ S₀[r + 1]
+            @test mapreduce(sv -> opnorm(sv, 2), max, AUSV_vals) ≈ S₀[r + 1]
             # Test truncation error
             @test ϵ1 ≈ norm(view(S₀, (r + 1):minmn)) atol = atol
 
@@ -250,7 +250,7 @@ function test_svd_trunc_algs(
             @test length(diagview(S1)) == r
             @test collect(diagview(S1)) ≈ S₀[1:r]
             AUSV_vals = svd_vals(A - U1 * S1 * V1ᴴ) # bypass broken svdvals on AMDGPU
-            @test mapreduce(sv->opnorm(sv, 2), max, AUSV_vals) ≈ S₀[r + 1]
+            @test mapreduce(sv -> opnorm(sv, 2), max, AUSV_vals) ≈ S₀[r + 1]
             # Test truncation error
             @test ϵ1 ≈ norm(view(S₀, (r + 1):minmn)) atol = atol
 

From 49dd07aefab026339815bf1f9c3352cca47bcd2b Mon Sep 17 00:00:00 2001
From: Katharine Hyatt <khyatt@flatironinstitute.org>
Date: Wed, 7 Jan 2026 11:44:39 -0500
Subject: [PATCH 09/10] Try generating AMD unitary special-case

---
 test/testsuite/TestSuite.jl | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/test/testsuite/TestSuite.jl b/test/testsuite/TestSuite.jl
index a0763c7f..ed33f1c7 100644
--- a/test/testsuite/TestSuite.jl
+++ b/test/testsuite/TestSuite.jl
@@ -77,6 +77,11 @@ isrightcomplete(V::AnyCuMatrix, N::AnyCuMatrix) = isrightcomplete(collect(V), co
 isrightcomplete(V::AnyROCMatrix, N::AnyROCMatrix) = isrightcomplete(collect(V), collect(N))
 
 instantiate_unitary(T, A, sz) = qr_compact(randn!(similar(A, eltype(T), sz, sz)))[1]
+# AMDGPU can't generate ComplexF32 random numbers
+function instantiate_unitary(T, A::ROCMatrix{ComplexF32}, sz)
+    sqA = randn!(similar(A, real(eltype(T)), sz, sz)) .+ im .* randn!(similar(A, real(eltype(T)), sz, sz))
+    return qr_compact(sqA)[1]
+end
 instantiate_unitary(::Type{<:Diagonal}, A, sz) = Diagonal(fill!(similar(parent(A), eltype(A), sz), one(eltype(A))))
 
 include("qr.jl")

From c78f557c20303020e85843a95ccc019216ad3aa5 Mon Sep 17 00:00:00 2001
From: Katharine Hyatt <khyatt@flatironinstitute.org>
Date: Wed, 7 Jan 2026 12:11:33 -0500
Subject: [PATCH 10/10] AMDGPU hates complex rand

---
 test/testsuite/TestSuite.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/testsuite/TestSuite.jl b/test/testsuite/TestSuite.jl
index ed33f1c7..2f3fde50 100644
--- a/test/testsuite/TestSuite.jl
+++ b/test/testsuite/TestSuite.jl
@@ -78,7 +78,7 @@ isrightcomplete(V::AnyROCMatrix, N::AnyROCMatrix) = isrightcomplete(collect(V),
 
 instantiate_unitary(T, A, sz) = qr_compact(randn!(similar(A, eltype(T), sz, sz)))[1]
 # AMDGPU can't generate ComplexF32 random numbers
-function instantiate_unitary(T, A::ROCMatrix{ComplexF32}, sz)
+function instantiate_unitary(T, A::ROCMatrix{<:Complex}, sz)
     sqA = randn!(similar(A, real(eltype(T)), sz, sz)) .+ im .* randn!(similar(A, real(eltype(T)), sz, sz))
     return qr_compact(sqA)[1]
 end