From a4281364e8ee3a6373b1e586c8dee76cb74a09bb Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Tue, 6 Jan 2026 19:06:52 +0100 Subject: [PATCH 01/10] Test svd_trunc for GPU --- test/svd.jl | 16 ++++++++-------- test/testsuite/svd.jl | 8 ++++---- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/test/svd.jl b/test/svd.jl index 40d0528e..1593ff3d 100644 --- a/test/svd.jl +++ b/test/svd.jl @@ -17,28 +17,28 @@ for T in (BLASFloats..., GenericFloats...), m in (0, 54), n in (0, 37, m, 63) TestSuite.seed_rng!(123) if T ∈ BLASFloats if CUDA.functional() - TestSuite.test_svd(CuMatrix{T}, (m, n); test_trunc = false) + TestSuite.test_svd(CuMatrix{T}, (m, n)) CUDA_SVD_ALGS = ( CUSOLVER_QRIteration(), CUSOLVER_SVDPolar(), CUSOLVER_Jacobi(), ) - TestSuite.test_svd_algs(CuMatrix{T}, (m, n), CUDA_SVD_ALGS; test_trunc = false) + TestSuite.test_svd_algs(CuMatrix{T}, (m, n), CUDA_SVD_ALGS) if n == m - TestSuite.test_svd(Diagonal{T, CuVector{T}}, m; test_trunc = false) - TestSuite.test_svd_algs(Diagonal{T, CuVector{T}}, m, (DiagonalAlgorithm(),); test_trunc = false) + TestSuite.test_svd(Diagonal{T, CuVector{T}}, m) + TestSuite.test_svd_algs(Diagonal{T, CuVector{T}}, m, (DiagonalAlgorithm(),)) end end if AMDGPU.functional() - TestSuite.test_svd(ROCMatrix{T}, (m, n); test_trunc = false) + TestSuite.test_svd(ROCMatrix{T}, (m, n)) AMD_SVD_ALGS = ( ROCSOLVER_QRIteration(), ROCSOLVER_Jacobi(), ) - TestSuite.test_svd_algs(ROCMatrix{T}, (m, n), AMD_SVD_ALGS; test_trunc = false) + TestSuite.test_svd_algs(ROCMatrix{T}, (m, n), AMD_SVD_ALGS) if n == m - TestSuite.test_svd(Diagonal{T, ROCVector{T}}, m; test_trunc = false) - TestSuite.test_svd_algs(Diagonal{T, ROCVector{T}}, m, (DiagonalAlgorithm(),); test_trunc = false) + TestSuite.test_svd(Diagonal{T, ROCVector{T}}, m) + TestSuite.test_svd_algs(Diagonal{T, ROCVector{T}}, m, (DiagonalAlgorithm(),)) end end end diff --git a/test/testsuite/svd.jl b/test/testsuite/svd.jl index d1d8ca33..3b96ecd0 100644 --- a/test/testsuite/svd.jl +++ b/test/testsuite/svd.jl @@ -2,21 +2,21 @@ using TestExtras using GenericLinearAlgebra using LinearAlgebra: opnorm -function test_svd(T::Type, sz; test_trunc = true, kwargs...) +function test_svd(T::Type, sz; kwargs...) summary_str = testargs_summary(T, sz) return @testset "svd $summary_str" begin test_svd_compact(T, sz; kwargs...) test_svd_full(T, sz; kwargs...) - test_trunc && test_svd_trunc(T, sz; kwargs...) + test_svd_trunc(T, sz; kwargs...) end end -function test_svd_algs(T::Type, sz, algs; test_trunc = true, kwargs...) +function test_svd_algs(T::Type, sz, algs; kwargs...) summary_str = testargs_summary(T, sz) return @testset "svd algorithms $summary_str" begin test_svd_compact_algs(T, sz, algs; kwargs...) test_svd_full_algs(T, sz, algs; kwargs...) - test_trunc && test_svd_trunc_algs(T, sz, algs; kwargs...) + test_svd_trunc_algs(T, sz, algs; kwargs...) end end From 9a88b6c734cc17fdc908717d0c2e2af16bf36431 Mon Sep 17 00:00:00 2001 From: Lukas Devos Date: Wed, 7 Jan 2026 13:20:16 +0100 Subject: [PATCH 02/10] scalar indexing in tests --- Project.toml | 5 ++++- test/testsuite/TestSuite.jl | 1 + test/testsuite/svd.jl | 12 ++++++------ 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/Project.toml b/Project.toml index 635ea305..e6a30fae 100644 --- a/Project.toml +++ b/Project.toml @@ -30,6 +30,7 @@ ChainRulesTestUtils = "1" CUDA = "5" GenericLinearAlgebra = "0.3.19" GenericSchur = "0.5.6" +GPUArrays = "11" JET = "0.9, 0.10" LinearAlgebra = "1" Mooncake = "0.4.183" @@ -46,6 +47,7 @@ AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" +GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" JET = "c3a54625-cd67-489e-a8e7-0a5a0ff4e31b" Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" @@ -56,4 +58,5 @@ TestExtras = "5ed8adda-3752-4e41-b88a-e8b09835ee3a" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [targets] -test = ["Aqua", "JET", "SafeTestsets", "Test", "TestExtras", "ChainRulesCore", "ChainRulesTestUtils", "Random", "StableRNGs", "Zygote", "CUDA", "AMDGPU", "GenericLinearAlgebra", "GenericSchur", "Mooncake"] +test = ["Aqua", "JET", "SafeTestsets", "Test", "TestExtras", "ChainRulesCore", + "ChainRulesTestUtils", "Random", "StableRNGs", "Zygote", "CUDA", "AMDGPU", "GPUArrays", "GenericLinearAlgebra", "GenericSchur", "Mooncake"] diff --git a/test/testsuite/TestSuite.jl b/test/testsuite/TestSuite.jl index a0763c7f..f0fb604f 100644 --- a/test/testsuite/TestSuite.jl +++ b/test/testsuite/TestSuite.jl @@ -14,6 +14,7 @@ using MatrixAlgebraKit: diagview using LinearAlgebra: Diagonal, norm, istriu, istril, I using Random, StableRNGs using AMDGPU, CUDA +using GPUArrays: @allowscalar const tests = Dict() diff --git a/test/testsuite/svd.jl b/test/testsuite/svd.jl index 3b96ecd0..e559c3c4 100644 --- a/test/testsuite/svd.jl +++ b/test/testsuite/svd.jl @@ -167,12 +167,12 @@ function test_svd_trunc( U1, S1, V1ᴴ, ϵ1 = @testinferred svd_trunc(A; trunc = truncrank(r)) @test length(diagview(S1)) == r @test diagview(S1) ≈ S₀[1:r] - @test opnorm(A - U1 * S1 * V1ᴴ) ≈ S₀[r + 1] + @test opnorm(A - U1 * S1 * V1ᴴ) ≈ @allowscalar S₀[r + 1] # Test truncation error @test ϵ1 ≈ norm(view(S₀, (r + 1):minmn)) atol = atol s = 1 + sqrt(eps(real(eltype(T)))) - trunc = trunctol(; atol = s * S₀[r + 1]) + trunc = trunctol(; atol = s * @allowscalar(S₀[r + 1])) U2, S2, V2ᴴ, ϵ2 = @testinferred svd_trunc(A; trunc) @test length(diagview(S2)) == r @@ -253,7 +253,7 @@ function test_svd_trunc_algs( @test ϵ1 ≈ norm(view(S₀, (r + 1):minmn)) atol = atol s = 1 + sqrt(eps(real(eltype(T)))) - trunc = trunctol(; atol = s * S₀[r + 1]) + trunc = trunctol(; atol = s * @allowscalar(S₀[r + 1])) U2, S2, V2ᴴ, ϵ2 = @testinferred svd_trunc(A; trunc, alg) @test length(diagview(S2)) == r @@ -285,11 +285,11 @@ function test_svd_trunc_algs( ) U1, S1, V1ᴴ, ϵ1 = svd_trunc(A; trunc = trunc_fun(0.2, 1), alg) @test length(diagview(S1)) == 1 - @test diagview(S1) ≈ diagview(S)[1:1] + @test collect(diagview(S1)) ≈ collect(diagview(S)[1:1]) U2, S2, V2ᴴ, ϵ2 = svd_trunc(A; trunc = trunc_fun(0.2, 3), alg) @test length(diagview(S2)) == 2 - @test diagview(S2) ≈ diagview(S)[1:2] + @test collect(diagview(S2)) ≈ collect(diagview(S)[1:2]) end end @testset "specify truncation algorithm" begin @@ -303,7 +303,7 @@ function test_svd_trunc_algs( A = U * S * Vᴴ truncalg = TruncatedAlgorithm(alg, trunctol(; atol = 0.2)) U2, S2, V2ᴴ, ϵ2 = @testinferred svd_trunc(A; alg = truncalg) - @test diagview(S2) ≈ diagview(S)[1:2] + @test collect(diagview(S2)) ≈ collect(diagview(S)[1:2]) @test ϵ2 ≈ norm(diagview(S)[3:4]) atol = atol @test_throws ArgumentError svd_trunc(A; alg = truncalg, trunc = (; maxrank = 2)) @test_throws ArgumentError svd_trunc_no_error(A; alg = truncalg, trunc = (; maxrank = 2)) From 33857b253b90bfcd5c2701dc55064596c667db3b Mon Sep 17 00:00:00 2001 From: Lukas Devos Date: Wed, 7 Jan 2026 14:05:05 +0100 Subject: [PATCH 03/10] bypass intersect on the GPU --- ext/MatrixAlgebraKitAMDGPUExt/MatrixAlgebraKitAMDGPUExt.jl | 5 +++++ ext/MatrixAlgebraKitCUDAExt/MatrixAlgebraKitCUDAExt.jl | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/ext/MatrixAlgebraKitAMDGPUExt/MatrixAlgebraKitAMDGPUExt.jl b/ext/MatrixAlgebraKitAMDGPUExt/MatrixAlgebraKitAMDGPUExt.jl index abfa6353..befb4e0b 100644 --- a/ext/MatrixAlgebraKitAMDGPUExt/MatrixAlgebraKitAMDGPUExt.jl +++ b/ext/MatrixAlgebraKitAMDGPUExt/MatrixAlgebraKitAMDGPUExt.jl @@ -167,4 +167,9 @@ function MatrixAlgebraKit._mul_herm!(C::StridedROCMatrix{T}, A::StridedROCMatrix return C end +# TODO: intersect on GPU arrays is not working +MatrixAlgebraKit._ind_intersect(A::ROCVector{Int}, B::AbstractVector) = MatrixAlgebraKit._ind_intersect(collect(A), B) +MatrixAlgebraKit._ind_intersect(A::AbstractVector, B::ROCVector{Int}) = MatrixAlgebraKit._ind_intersect(A, collect(B)) +MatrixAlgebraKit._ind_intersect(A::ROCVector{Int}, B::ROCVector{Int}) = MatrixAlgebraKit._ind_intersect(collect(A), collect(B)) + end diff --git a/ext/MatrixAlgebraKitCUDAExt/MatrixAlgebraKitCUDAExt.jl b/ext/MatrixAlgebraKitCUDAExt/MatrixAlgebraKitCUDAExt.jl index e3acb553..432f176a 100644 --- a/ext/MatrixAlgebraKitCUDAExt/MatrixAlgebraKitCUDAExt.jl +++ b/ext/MatrixAlgebraKitCUDAExt/MatrixAlgebraKitCUDAExt.jl @@ -191,4 +191,9 @@ function MatrixAlgebraKit._mul_herm!(C::StridedCuMatrix{T}, A::StridedCuMatrix{T return C end +# TODO: intersect on GPU arrays is not working +MatrixAlgebraKit._ind_intersect(A::CuVector{Int}, B::AbstractVector) = MatrixAlgebraKit._ind_intersect(collect(A), B) +MatrixAlgebraKit._ind_intersect(A::AbstractVector, B::CuVector{Int}) = MatrixAlgebraKit._ind_intersect(A, collect(B)) +MatrixAlgebraKit._ind_intersect(A::CuVector{Int}, B::CuVector{Int}) = MatrixAlgebraKit._ind_intersect(collect(A), collect(B)) + end From 80169d8769b8b284bc73a644d8afbc5f1f64e5d0 Mon Sep 17 00:00:00 2001 From: Lukas Devos Date: Wed, 7 Jan 2026 14:16:46 +0100 Subject: [PATCH 04/10] more scalar indexing in tests --- test/testsuite/svd.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/testsuite/svd.jl b/test/testsuite/svd.jl index e559c3c4..32f31762 100644 --- a/test/testsuite/svd.jl +++ b/test/testsuite/svd.jl @@ -248,7 +248,7 @@ function test_svd_trunc_algs( U1, S1, V1ᴴ, ϵ1 = @testinferred svd_trunc(A; trunc = truncrank(r), alg) @test length(diagview(S1)) == r @test diagview(S1) ≈ S₀[1:r] - @test opnorm(A - U1 * S1 * V1ᴴ) ≈ S₀[r + 1] + @test opnorm(A - U1 * S1 * V1ᴴ) ≈ @allowscalar S₀[r + 1] # Test truncation error @test ϵ1 ≈ norm(view(S₀, (r + 1):minmn)) atol = atol From cf91ebdc27e181408d261f0f63f7977a5c1aa5de Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Wed, 7 Jan 2026 09:58:53 -0500 Subject: [PATCH 05/10] Get rid of GPUArrays --- Project.toml | 4 +--- test/svd.jl | 1 + test/testsuite/TestSuite.jl | 1 - test/testsuite/svd.jl | 16 ++++++++-------- 4 files changed, 10 insertions(+), 12 deletions(-) diff --git a/Project.toml b/Project.toml index e6a30fae..cc694527 100644 --- a/Project.toml +++ b/Project.toml @@ -30,7 +30,6 @@ ChainRulesTestUtils = "1" CUDA = "5" GenericLinearAlgebra = "0.3.19" GenericSchur = "0.5.6" -GPUArrays = "11" JET = "0.9, 0.10" LinearAlgebra = "1" Mooncake = "0.4.183" @@ -47,7 +46,6 @@ AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" -GPUArrays = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" JET = "c3a54625-cd67-489e-a8e7-0a5a0ff4e31b" Mooncake = "da2b9cff-9c12-43a0-ae48-6db2b0edb7d6" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" @@ -59,4 +57,4 @@ Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [targets] test = ["Aqua", "JET", "SafeTestsets", "Test", "TestExtras", "ChainRulesCore", - "ChainRulesTestUtils", "Random", "StableRNGs", "Zygote", "CUDA", "AMDGPU", "GPUArrays", "GenericLinearAlgebra", "GenericSchur", "Mooncake"] + "ChainRulesTestUtils", "Random", "StableRNGs", "Zygote", "CUDA", "AMDGPU", "GenericLinearAlgebra", "GenericSchur", "Mooncake"] diff --git a/test/svd.jl b/test/svd.jl index 1593ff3d..affe2942 100644 --- a/test/svd.jl +++ b/test/svd.jl @@ -4,6 +4,7 @@ using TestExtras using StableRNGs using LinearAlgebra: Diagonal using CUDA, AMDGPU +using CUDA.CUSOLVER # pull in opnorm binding BLASFloats = (Float32, Float64, ComplexF32, ComplexF64) GenericFloats = (BigFloat, Complex{BigFloat}) diff --git a/test/testsuite/TestSuite.jl b/test/testsuite/TestSuite.jl index f0fb604f..a0763c7f 100644 --- a/test/testsuite/TestSuite.jl +++ b/test/testsuite/TestSuite.jl @@ -14,7 +14,6 @@ using MatrixAlgebraKit: diagview using LinearAlgebra: Diagonal, norm, istriu, istril, I using Random, StableRNGs using AMDGPU, CUDA -using GPUArrays: @allowscalar const tests = Dict() diff --git a/test/testsuite/svd.jl b/test/testsuite/svd.jl index 32f31762..e8af9d68 100644 --- a/test/testsuite/svd.jl +++ b/test/testsuite/svd.jl @@ -160,19 +160,19 @@ function test_svd_trunc( Ac = deepcopy(A) m, n = size(A) minmn = min(m, n) - S₀ = svd_vals(A) + S₀ = collect(svd_vals(A)) r = minmn - 2 if m > 0 && n > 0 U1, S1, V1ᴴ, ϵ1 = @testinferred svd_trunc(A; trunc = truncrank(r)) @test length(diagview(S1)) == r - @test diagview(S1) ≈ S₀[1:r] - @test opnorm(A - U1 * S1 * V1ᴴ) ≈ @allowscalar S₀[r + 1] + @test collect(diagview(S1)) ≈ S₀[1:r] + @test opnorm(A - U1 * S1 * V1ᴴ) ≈ S₀[r + 1] # Test truncation error @test ϵ1 ≈ norm(view(S₀, (r + 1):minmn)) atol = atol s = 1 + sqrt(eps(real(eltype(T)))) - trunc = trunctol(; atol = s * @allowscalar(S₀[r + 1])) + trunc = trunctol(; atol = s * S₀[r + 1]) U2, S2, V2ᴴ, ϵ2 = @testinferred svd_trunc(A; trunc) @test length(diagview(S2)) == r @@ -241,19 +241,19 @@ function test_svd_trunc_algs( Ac = deepcopy(A) m, n = size(A) minmn = min(m, n) - S₀ = svd_vals(A) + S₀ = collect(svd_vals(A)) r = minmn - 2 if m > 0 && n > 0 U1, S1, V1ᴴ, ϵ1 = @testinferred svd_trunc(A; trunc = truncrank(r), alg) @test length(diagview(S1)) == r - @test diagview(S1) ≈ S₀[1:r] - @test opnorm(A - U1 * S1 * V1ᴴ) ≈ @allowscalar S₀[r + 1] + @test collect(diagview(S1)) ≈ S₀[1:r] + @test opnorm(A - U1 * S1 * V1ᴴ) ≈ S₀[r + 1] # Test truncation error @test ϵ1 ≈ norm(view(S₀, (r + 1):minmn)) atol = atol s = 1 + sqrt(eps(real(eltype(T)))) - trunc = trunctol(; atol = s * @allowscalar(S₀[r + 1])) + trunc = trunctol(; atol = s * S₀[r + 1]) U2, S2, V2ᴴ, ϵ2 = @testinferred svd_trunc(A; trunc, alg) @test length(diagview(S2)) == r From 01b3d0d076810797f8a0b44420af04504df4cc06 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Wed, 7 Jan 2026 10:19:11 -0500 Subject: [PATCH 06/10] Try to unbreak AMDGPU --- test/testsuite/svd.jl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/testsuite/svd.jl b/test/testsuite/svd.jl index e8af9d68..7b0c1396 100644 --- a/test/testsuite/svd.jl +++ b/test/testsuite/svd.jl @@ -167,7 +167,8 @@ function test_svd_trunc( U1, S1, V1ᴴ, ϵ1 = @testinferred svd_trunc(A; trunc = truncrank(r)) @test length(diagview(S1)) == r @test collect(diagview(S1)) ≈ S₀[1:r] - @test opnorm(A - U1 * S1 * V1ᴴ) ≈ S₀[r + 1] + AUSV_vals = svd_vals(A - U1 * S1 * V1ᴴ) # bypass broken svdvals on AMDGPU + @test opnorm(Diagonal(AUSV_vals)) ≈ S₀[r + 1] # Test truncation error @test ϵ1 ≈ norm(view(S₀, (r + 1):minmn)) atol = atol @@ -248,7 +249,8 @@ function test_svd_trunc_algs( U1, S1, V1ᴴ, ϵ1 = @testinferred svd_trunc(A; trunc = truncrank(r), alg) @test length(diagview(S1)) == r @test collect(diagview(S1)) ≈ S₀[1:r] - @test opnorm(A - U1 * S1 * V1ᴴ) ≈ S₀[r + 1] + AUSV_vals = svd_vals(A - U1 * S1 * V1ᴴ) # bypass broken svdvals on AMDGPU + @test opnorm(Diagonal(AUSV_vals)) ≈ S₀[r + 1] # Test truncation error @test ϵ1 ≈ norm(view(S₀, (r + 1):minmn)) atol = atol From 5eab7f6b5ed694e81d5ae5e488e8e62cdba27c18 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Wed, 7 Jan 2026 10:47:14 -0500 Subject: [PATCH 07/10] Actually fix --- test/testsuite/svd.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/testsuite/svd.jl b/test/testsuite/svd.jl index 7b0c1396..ac7a58e6 100644 --- a/test/testsuite/svd.jl +++ b/test/testsuite/svd.jl @@ -168,7 +168,7 @@ function test_svd_trunc( @test length(diagview(S1)) == r @test collect(diagview(S1)) ≈ S₀[1:r] AUSV_vals = svd_vals(A - U1 * S1 * V1ᴴ) # bypass broken svdvals on AMDGPU - @test opnorm(Diagonal(AUSV_vals)) ≈ S₀[r + 1] + @test mapreduce(sv->opnorm(sv, 2), max, AUSV_vals) ≈ S₀[r + 1] # Test truncation error @test ϵ1 ≈ norm(view(S₀, (r + 1):minmn)) atol = atol @@ -250,7 +250,7 @@ function test_svd_trunc_algs( @test length(diagview(S1)) == r @test collect(diagview(S1)) ≈ S₀[1:r] AUSV_vals = svd_vals(A - U1 * S1 * V1ᴴ) # bypass broken svdvals on AMDGPU - @test opnorm(Diagonal(AUSV_vals)) ≈ S₀[r + 1] + @test mapreduce(sv->opnorm(sv, 2), max, AUSV_vals) ≈ S₀[r + 1] # Test truncation error @test ϵ1 ≈ norm(view(S₀, (r + 1):minmn)) atol = atol From a460df5a506ff582e640524d77c3e9ed626968c2 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Wed, 7 Jan 2026 10:52:00 -0500 Subject: [PATCH 08/10] Format --- test/testsuite/svd.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/testsuite/svd.jl b/test/testsuite/svd.jl index ac7a58e6..800018b2 100644 --- a/test/testsuite/svd.jl +++ b/test/testsuite/svd.jl @@ -168,7 +168,7 @@ function test_svd_trunc( @test length(diagview(S1)) == r @test collect(diagview(S1)) ≈ S₀[1:r] AUSV_vals = svd_vals(A - U1 * S1 * V1ᴴ) # bypass broken svdvals on AMDGPU - @test mapreduce(sv->opnorm(sv, 2), max, AUSV_vals) ≈ S₀[r + 1] + @test mapreduce(sv -> opnorm(sv, 2), max, AUSV_vals) ≈ S₀[r + 1] # Test truncation error @test ϵ1 ≈ norm(view(S₀, (r + 1):minmn)) atol = atol @@ -250,7 +250,7 @@ function test_svd_trunc_algs( @test length(diagview(S1)) == r @test collect(diagview(S1)) ≈ S₀[1:r] AUSV_vals = svd_vals(A - U1 * S1 * V1ᴴ) # bypass broken svdvals on AMDGPU - @test mapreduce(sv->opnorm(sv, 2), max, AUSV_vals) ≈ S₀[r + 1] + @test mapreduce(sv -> opnorm(sv, 2), max, AUSV_vals) ≈ S₀[r + 1] # Test truncation error @test ϵ1 ≈ norm(view(S₀, (r + 1):minmn)) atol = atol From 49dd07aefab026339815bf1f9c3352cca47bcd2b Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Wed, 7 Jan 2026 11:44:39 -0500 Subject: [PATCH 09/10] Try generating AMD unitary special-case --- test/testsuite/TestSuite.jl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/testsuite/TestSuite.jl b/test/testsuite/TestSuite.jl index a0763c7f..ed33f1c7 100644 --- a/test/testsuite/TestSuite.jl +++ b/test/testsuite/TestSuite.jl @@ -77,6 +77,11 @@ isrightcomplete(V::AnyCuMatrix, N::AnyCuMatrix) = isrightcomplete(collect(V), co isrightcomplete(V::AnyROCMatrix, N::AnyROCMatrix) = isrightcomplete(collect(V), collect(N)) instantiate_unitary(T, A, sz) = qr_compact(randn!(similar(A, eltype(T), sz, sz)))[1] +# AMDGPU can't generate ComplexF32 random numbers +function instantiate_unitary(T, A::ROCMatrix{ComplexF32}, sz) + sqA = randn!(similar(A, real(eltype(T)), sz, sz)) .+ im .* randn!(similar(A, real(eltype(T)), sz, sz)) + return qr_compact(sqA)[1] +end instantiate_unitary(::Type{<:Diagonal}, A, sz) = Diagonal(fill!(similar(parent(A), eltype(A), sz), one(eltype(A)))) include("qr.jl") From c78f557c20303020e85843a95ccc019216ad3aa5 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Wed, 7 Jan 2026 12:11:33 -0500 Subject: [PATCH 10/10] AMDGPU hates complex rand --- test/testsuite/TestSuite.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/testsuite/TestSuite.jl b/test/testsuite/TestSuite.jl index ed33f1c7..2f3fde50 100644 --- a/test/testsuite/TestSuite.jl +++ b/test/testsuite/TestSuite.jl @@ -78,7 +78,7 @@ isrightcomplete(V::AnyROCMatrix, N::AnyROCMatrix) = isrightcomplete(collect(V), instantiate_unitary(T, A, sz) = qr_compact(randn!(similar(A, eltype(T), sz, sz)))[1] # AMDGPU can't generate ComplexF32 random numbers -function instantiate_unitary(T, A::ROCMatrix{ComplexF32}, sz) +function instantiate_unitary(T, A::ROCMatrix{<:Complex}, sz) sqA = randn!(similar(A, real(eltype(T)), sz, sz)) .+ im .* randn!(similar(A, real(eltype(T)), sz, sz)) return qr_compact(sqA)[1] end