From afc461e1e57c611ac1103cf21d877a523144f765 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Fri, 2 Jan 2026 07:47:19 -0500 Subject: [PATCH 1/7] Initial support for CUDA + factorizations --- test/cuda/factorizations.jl | 499 ++++++++++++++++++++++++++++++++++++ test/runtests.jl | 1 + 2 files changed, 500 insertions(+) create mode 100644 test/cuda/factorizations.jl diff --git a/test/cuda/factorizations.jl b/test/cuda/factorizations.jl new file mode 100644 index 000000000..d44bb878a --- /dev/null +++ b/test/cuda/factorizations.jl @@ -0,0 +1,499 @@ +using Adapt, CUDA, cuTENSOR +using Test, TestExtras +using TensorKit +using LinearAlgebra: LinearAlgebra +using MatrixAlgebraKit: diagview +const CUDAExt = Base.get_extension(TensorKit, :TensorKitCUDAExt) +@assert !isnothing(CUDAExt) +const CuTensorMap = getglobal(CUDAExt, :CuTensorMap) +const curand = getglobal(CUDAExt, :curand) +const curandn = getglobal(CUDAExt, :curandn) +const curand! = getglobal(CUDAExt, :curand!) +using CUDA: rand as curand, rand! as curand!, randn as curandn, randn! as curandn! + +@isdefined(TestSetup) || include("../setup.jl") +using .TestSetup + +spacelist = try + if ENV["CI"] == "true" + println("Detected running on CI") + if Sys.iswindows() + (Vtr, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂, VIB_diag) + elseif Sys.isapple() + (Vtr, Vℤ₃, VfU₁, VfSU₂, VIB_M) + else + (Vtr, VU₁, VCU₁, VSU₂, VfSU₂, VIB_diag, VIB_M) + end + else + (Vtr, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂, VfSU₂, VIB_diag, VIB_M) + end +catch + (Vtr, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂, VfSU₂, VIB_diag, VIB_M) +end + +eltypes = (Float32, ComplexF64) + +for V in spacelist + I = sectortype(first(V)) + Istr = TensorKit.type_repr(I) + println("---------------------------------------") + println("Factorizations with symmetry: $Istr") + println("---------------------------------------") + @timedtestset "Factorizations with symmetry: $Istr" verbose = true begin + V1, V2, V3, V4, V5 = V + W = V1 ⊗ V2 + @assert !isempty(blocksectors(W)) + @assert !isempty(intersect(blocksectors(V4), blocksectors(W))) + + @testset "QR decomposition" begin + for T in eltypes, + t in ( + CUDA.rand(T, W, W), + #CUDA.rand(T, W, W)', + CUDA.rand(T, W, V4), + #CUDA.rand(T, V4, W)', + DiagonalTensorMap(CUDA.rand(T, reduceddim(V1)), V1), + ) + + Q, R = @constinferred qr_full(t) + @test Q * R ≈ t + @test isunitary(Q) + + Q, R = @constinferred qr_compact(t) + @test Q * R ≈ t + @test isisometric(Q) + + Q, R = @constinferred left_orth(t) + @test Q * R ≈ t + @test isisometric(Q) + + N = @constinferred qr_null(t) + @test isisometric(N) + @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t)) + + N = @constinferred left_null(t) + @test isisometric(N) + @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t)) + end + + # empty tensor + for T in eltypes + t = CUDA.rand(T, V1 ⊗ V2, zerospace(V1)) + + Q, R = @constinferred qr_full(t) + @test Q * R ≈ t + @test isunitary(Q) + @test dim(R) == dim(t) == 0 + + Q, R = @constinferred qr_compact(t) + @test Q * R ≈ t + @test isisometric(Q) + @test dim(Q) == dim(R) == dim(t) + + Q, R = @constinferred left_orth(t) + @test Q * R ≈ t + @test isisometric(Q) + @test dim(Q) == dim(R) == dim(t) + + N = @constinferred qr_null(t) + @test isunitary(N) + @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t)) + end + end + + @testset "LQ decomposition" begin + for T in eltypes, + t in ( + CUDA.rand(T, W, W), + #CUDA.rand(T, W, W)', + CUDA.rand(T, W, V4), + #CUDA.rand(T, V4, W)', + DiagonalTensorMap(CUDA.rand(T, reduceddim(V1)), V1), + ) + + L, Q = @constinferred lq_full(t) + @test L * Q ≈ t + @test isunitary(Q) + + L, Q = @constinferred lq_compact(t) + @test L * Q ≈ t + @test isisometric(Q; side = :right) + + L, Q = @constinferred right_orth(t) + @test L * Q ≈ t + @test isisometric(Q; side = :right) + + Nᴴ = @constinferred lq_null(t) + @test isisometric(Nᴴ; side = :right) + @test norm(t * Nᴴ') ≈ 0 atol = 100 * eps(norm(t)) + end + + for T in eltypes + # empty tensor + t = CUDA.rand(T, zerospace(V1), V1 ⊗ V2) + + L, Q = @constinferred lq_full(t) + @test L * Q ≈ t + @test isunitary(Q) + @test dim(L) == dim(t) == 0 + + L, Q = @constinferred lq_compact(t) + @test L * Q ≈ t + @test isisometric(Q; side = :right) + @test dim(Q) == dim(L) == dim(t) + + L, Q = @constinferred right_orth(t) + @test L * Q ≈ t + @test isisometric(Q; side = :right) + @test dim(Q) == dim(L) == dim(t) + + Nᴴ = @constinferred lq_null(t) + @test isunitary(Nᴴ) + @test norm(t * Nᴴ') ≈ 0 atol = 100 * eps(norm(t)) + end + end + + @testset "Polar decomposition" begin + for T in eltypes, + t in ( + CUDA.rand(T, W, W), + #CUDA.rand(T, W, W)', + CUDA.rand(T, W, V4), + #CUDA.rand(T, V4, W)', + DiagonalTensorMap(CUDA.rand(T, reduceddim(V1)), V1), + ) + + @assert domain(t) ≾ codomain(t) + w, p = @constinferred left_polar(t) + @test w * p ≈ t + @test isisometric(w) + @test isposdef(p) + + w, p = @constinferred left_orth(t; alg = :polar) + @test w * p ≈ t + @test isisometric(w) + end + + for T in eltypes, + t in ( + CUDA.rand(T, W, W), + #CUDA.rand(T, W, W)', + CUDA.rand(T, V4, W), + #CUDA.rand(T, W, V4)', + ) + + @assert codomain(t) ≾ domain(t) + p, wᴴ = @constinferred right_polar(t) + @test p * wᴴ ≈ t + @test isisometric(wᴴ; side = :right) + @test isposdef(p) + + p, wᴴ = @constinferred right_orth(t; alg = :polar) + @test p * wᴴ ≈ t + @test isisometric(wᴴ; side = :right) + end + end + + @testset "SVD" begin + for T in eltypes, + t in ( + CUDA.rand(T, W, W), + #CUDA.rand(T, W, W)', + CUDA.rand(T, W, V4), + CUDA.rand(T, V4, W), + #CUDA.rand(T, W, V4)', + #CUDA.rand(T, V4, W)', + DiagonalTensorMap(CUDA.rand(T, reduceddim(V1)), V1), + ) + + u, s, vᴴ = @constinferred svd_full(t) + @test u * s * vᴴ ≈ t + @test isunitary(u) + @test isunitary(vᴴ) + + u, s, vᴴ = @constinferred svd_compact(t) + @test u * s * vᴴ ≈ t + @test isisometric(u) + @test isposdef(s) + @test isisometric(vᴴ; side = :right) + + s′ = @constinferred svd_vals(t) + @test parent(s′) ≈ parent(diagview(s)) + @test s′ isa TensorKit.SectorVector + + v, c = @constinferred left_orth(t; alg = :svd) + @test v * c ≈ t + @test isisometric(v) + + c, vᴴ = @constinferred right_orth(t; alg = :svd) + @test c * vᴴ ≈ t + @test isisometric(vᴴ; side = :right) + + N = @constinferred left_null(t; alg = :svd) + @test isisometric(N) + @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t)) + + #N = @constinferred left_null(t; trunc = (; atol = 100 * eps(norm(t)))) + #@test isisometric(N) + #@test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t)) + + Nᴴ = @constinferred right_null(t; alg = :svd) + @test isisometric(Nᴴ; side = :right) + @test norm(t * Nᴴ') ≈ 0 atol = 100 * eps(norm(t)) + + #Nᴴ = @constinferred right_null(t; trunc = (; atol = 100 * eps(norm(t)))) + #@test isisometric(Nᴴ; side = :right) + #@test norm(t * Nᴴ') ≈ 0 atol = 100 * eps(norm(t)) + end + + # empty tensor + for T in eltypes, t in ( + CUDA.rand(T, W, zerospace(V1)), + CUDA.rand(T, zerospace(V1), W), + ) + U, S, Vᴴ = @constinferred svd_full(t) + @test U * S * Vᴴ ≈ t + @test isunitary(U) + @test isunitary(Vᴴ) + + U, S, Vᴴ = @constinferred svd_compact(t) + @test U * S * Vᴴ ≈ t + @test dim(U) == dim(S) == dim(Vᴴ) == dim(t) == 0 + end + end + + #=@testset "truncated SVD" begin + for T in eltypes, + t in ( + CUDA.randn(T, W, W), + #CUDA.randn(T, W, W)', + CUDA.randn(T, W, V4), + CUDA.randn(T, V4, W), + #CUDA.randn(T, W, V4)', + #CUDA.randn(T, V4, W)', + DiagonalTensorMap(CUDA.randn(T, reduceddim(V1)), V1), + ) + + @constinferred normalize!(t) + + U, S, Vᴴ, ϵ = @constinferred svd_trunc(t; trunc = notrunc()) + @test U * S * Vᴴ ≈ t + @test ϵ ≈ 0 + @test isisometric(U) + @test isisometric(Vᴴ; side = :right) + + # dimension of S is a float for IsingBimodule + nvals = round(Int, dim(domain(S)) / 2) + trunc = truncrank(nvals) + U1, S1, Vᴴ1, ϵ1 = @constinferred svd_trunc(t; trunc) + @test t * Vᴴ1' ≈ U1 * S1 + @test isisometric(U1) + @test isisometric(Vᴴ1; side = :right) + @test norm(t - U1 * S1 * Vᴴ1) ≈ ϵ1 atol = eps(real(T))^(4 / 5) + @test dim(domain(S1)) <= nvals + + λ = minimum(diagview(S1)) + trunc = trunctol(; atol = λ - 10eps(λ)) + U2, S2, Vᴴ2, ϵ2 = @constinferred svd_trunc(t; trunc) + @test t * Vᴴ2' ≈ U2 * S2 + @test isisometric(U2) + @test isisometric(Vᴴ2; side = :right) + @test norm(t - U2 * S2 * Vᴴ2) ≈ ϵ2 atol = eps(real(T))^(4 / 5) + @test minimum(diagview(S1)) >= λ + @test U2 ≈ U1 + @test S2 ≈ S1 + @test Vᴴ2 ≈ Vᴴ1 + @test ϵ1 ≈ ϵ2 + + trunc = truncspace(space(S2, 1)) + U3, S3, Vᴴ3, ϵ3 = @constinferred svd_trunc(t; trunc) + @test t * Vᴴ3' ≈ U3 * S3 + @test isisometric(U3) + @test isisometric(Vᴴ3; side = :right) + @test norm(t - U3 * S3 * Vᴴ3) ≈ ϵ3 atol = eps(real(T))^(4 / 5) + @test space(S3, 1) ≾ space(S2, 1) + + for trunc in (truncerror(; atol = ϵ2), truncerror(; rtol = ϵ2 / norm(t))) + U4, S4, Vᴴ4, ϵ4 = @constinferred svd_trunc(t; trunc) + @test t * Vᴴ4' ≈ U4 * S4 + @test isisometric(U4) + @test isisometric(Vᴴ4; side = :right) + @test norm(t - U4 * S4 * Vᴴ4) ≈ ϵ4 atol = eps(real(T))^(4 / 5) + @test ϵ4 ≤ ϵ2 + end + + trunc = truncrank(nvals) & trunctol(; atol = λ - 10eps(λ)) + U5, S5, Vᴴ5, ϵ5 = @constinferred svd_trunc(t; trunc) + @test t * Vᴴ5' ≈ U5 * S5 + @test isisometric(U5) + @test isisometric(Vᴴ5; side = :right) + @test norm(t - U5 * S5 * Vᴴ5) ≈ ϵ5 atol = eps(real(T))^(4 / 5) + @test minimum(diagview(S5)) >= λ + @test dim(domain(S5)) ≤ nvals + end + end=# # TODO + + @testset "Eigenvalue decomposition" begin + for T in eltypes, + t in ( + CUDA.rand(T, V1, V1), + CUDA.rand(T, W, W), + #CUDA.rand(T, W, W)', + DiagonalTensorMap(CUDA.rand(T, reduceddim(V1)), V1), + ) + + d, v = @constinferred eig_full(t) + @test t * v ≈ v * d + + d′ = @constinferred eig_vals(t) + @test parent(d′) ≈ parent(diagview(d)) + @test d′ isa TensorKit.SectorVector + + vdv = project_hermitian!(v' * v) + @test @constinferred isposdef(vdv) + t isa DiagonalTensorMap || @test !isposdef(t) # unlikely for non-hermitian map + + #=nvals = round(Int, dim(domain(t)) / 2) + d, v = @constinferred eig_trunc(t; trunc = truncrank(nvals)) + @test t * v ≈ v * d + @test dim(domain(d)) ≤ nvals=# + + t2 = @constinferred project_hermitian(t) + D, V = eigen(t2) + @test isisometric(V) + D̃, Ṽ = @constinferred eigh_full(t2) + @test D ≈ D̃ + @test V ≈ Ṽ + λ = minimum(real, parent(diagview(D))) + @test cond(Ṽ) ≈ one(real(T)) + @test isposdef(t2) == isposdef(λ) + @test isposdef(t2 - λ * one(t2) + 0.1 * one(t2)) + @test !isposdef(t2 - λ * one(t2) - 0.1 * one(t2)) + + d, v = @constinferred eigh_full(t2) + @test t2 * v ≈ v * d + @test isunitary(v) + + d′ = @constinferred eigh_vals(t2) + @test parent(d′) ≈ parent(diagview(d)) + @test d′ isa TensorKit.SectorVector + + λ = minimum(real, parent(diagview(d))) + @test cond(v) ≈ one(real(T)) + @test isposdef(t2) == isposdef(λ) + @test isposdef(t2 - λ * one(t) + 0.1 * one(t2)) + @test !isposdef(t2 - λ * one(t) - 0.1 * one(t2)) + + # TODO + #=d, v = @constinferred eigh_trunc(t2; trunc = truncrank(nvals)) + @test t2 * v ≈ v * d + @test dim(domain(d)) ≤ nvals=# + end + end + + @testset "Condition number and rank" begin + for T in eltypes, + t in ( + CUDA.rand(T, W, W), + #CUDA.rand(T, W, W)', + CUDA.rand(T, W, V4), + CUDA.rand(T, V4, W), + #CUDA.rand(T, W, V4)', + #CUDA.rand(T, V4, W)', + DiagonalTensorMap(CUDA.rand(T, reduceddim(V1)), V1), + ) + + d1, d2 = dim(codomain(t)), dim(domain(t)) + r = rank(t) + @test r == min(d1, d2) + @test typeof(r) == typeof(d1) + M = left_null(t) + @test @constinferred(rank(M)) + r ≈ d1 + Mᴴ = right_null(t) + @test rank(Mᴴ) + r ≈ d2 + end + for T in eltypes + u = unitary(T, V1 ⊗ V2, V1 ⊗ V2) + @test @constinferred(cond(u)) ≈ one(real(T)) + @test @constinferred(rank(u)) == dim(V1 ⊗ V2) + + t = CUDA.rand(T, zerospace(V1), W) + @test rank(t) == 0 + t2 = CUDA.rand(T, zerospace(V1) * zerospace(V2), zerospace(V1) * zerospace(V2)) + @test rank(t2) == 0 + @test cond(t2) == 0.0 + end + for T in eltypes, t in ( + CUDA.rand(T, W, W), + #CUDA.rand(T, W, W)', + ) + project_hermitian!(t) + vals = @constinferred LinearAlgebra.eigvals(t) + λmax = maximum(s -> maximum(abs, s), values(vals)) + λmin = minimum(s -> minimum(abs, s), values(vals)) + @test cond(t) ≈ λmax / λmin + end + end + + @testset "Hermitian projections" begin + for T in eltypes, + t in ( + CUDA.rand(T, V1, V1), + CUDA.rand(T, W, W), + #CUDA.rand(T, W, W)', + DiagonalTensorMap(CUDA.rand(T, reduceddim(V1)), V1), + ) + normalize!(t) + noisefactor = eps(real(T))^(3 / 4) + + th = (t + t') / 2 + ta = (t - t') / 2 + tc = copy(t) + + th′ = @constinferred project_hermitian(t) + @test ishermitian(th′) + @test th′ ≈ th + @test t == tc + th_approx = th + noisefactor * ta + @test !ishermitian(th_approx) || (T <: Real && t isa DiagonalTensorMap) + @test ishermitian(th_approx; atol = 10 * noisefactor) + + ta′ = project_antihermitian(t) + @test isantihermitian(ta′) + @test ta′ ≈ ta + @test t == tc + ta_approx = ta + noisefactor * th + @test !isantihermitian(ta_approx) + @test isantihermitian(ta_approx; atol = 10 * noisefactor) || (T <: Real && t isa DiagonalTensorMap) + end + end + + @testset "Isometric projections" begin + for T in eltypes, + t in ( + CUDA.randn(T, W, W), + #CUDA.randn(T, W, W)', + CUDA.randn(T, W, V4), + #CUDA.randn(T, V4, W)', + ) + t2 = project_isometric(t) + @test isisometric(t2) + t3 = project_isometric(t2) + @test t3 ≈ t2 # stability of the projection + @test t2 * (t2' * t) ≈ t + + tc = similar(t) + t3 = @constinferred project_isometric!(copy!(tc, t), t2) + @test t3 === t2 + @test isisometric(t2) + + # test that t2 is closer to A then any other isometry + for k in 1:10 + δt = CUDA.randn!(similar(t)) + t3 = project_isometric(t + δt / 100) + @test norm(t - t3) > norm(t - t2) + end + end + end + end +end diff --git a/test/runtests.jl b/test/runtests.jl index 9513378db..3b0bfe8b0 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -50,6 +50,7 @@ istestfile(fn) = endswith(fn, ".jl") && !contains(fn, "setup") using CUDA CUDA.functional() || continue @time include("cuda/tensors.jl") + @time include("cuda/factorizations.jl") elseif is_buildkite continue end From ed8f52b7a05734e0f4bc7f0ab945351abdc8857d Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Fri, 2 Jan 2026 13:56:36 -0500 Subject: [PATCH 2/7] Working adjoint --- ext/TensorKitCUDAExt/cutensormap.jl | 8 ++++++ src/factorizations/adjoint.jl | 3 +++ test/cuda/factorizations.jl | 42 ++++++++++++++--------------- 3 files changed, 32 insertions(+), 21 deletions(-) diff --git a/ext/TensorKitCUDAExt/cutensormap.jl b/ext/TensorKitCUDAExt/cutensormap.jl index b4aa7f7a2..28927687b 100644 --- a/ext/TensorKitCUDAExt/cutensormap.jl +++ b/ext/TensorKitCUDAExt/cutensormap.jl @@ -164,3 +164,11 @@ for f in (:sqrt, :log, :asin, :acos, :acosh, :atanh, :acoth) return tf end end + +function Base.copy!(tdst::CuTensorMap, tsrc::AdjointTensorMap) + space(tdst) == space(tsrc) || throw(SpaceMismatch("$(space(tdst)) ≠ $(space(tsrc))")) + for ((c, bdst), (_, bsrc)) in zip(blocks(tdst), blocks(tsrc)) + copy!(bdst, bsrc) + end + return tdst +end diff --git a/src/factorizations/adjoint.jl b/src/factorizations/adjoint.jl index 29f27203c..8a75fc2fd 100644 --- a/src/factorizations/adjoint.jl +++ b/src/factorizations/adjoint.jl @@ -9,6 +9,9 @@ _adjoint(alg::MAK.LAPACK_HouseholderRQ) = MAK.LAPACK_HouseholderQL(; alg.kwargs. _adjoint(alg::MAK.PolarViaSVD) = MAK.PolarViaSVD(_adjoint(alg.svd_alg)) _adjoint(alg::AbstractAlgorithm) = alg +_adjoint(alg::MAK.CUSOLVER_HouseholderQR) = MAK.LQViaTransposedQR(alg) +_adjoint(alg::MAK.LQViaTransposedQR) = alg.qr_alg + for f in [ :svd_compact, :svd_full, :svd_vals, diff --git a/test/cuda/factorizations.jl b/test/cuda/factorizations.jl index d44bb878a..8e03368ab 100644 --- a/test/cuda/factorizations.jl +++ b/test/cuda/factorizations.jl @@ -49,9 +49,9 @@ for V in spacelist for T in eltypes, t in ( CUDA.rand(T, W, W), - #CUDA.rand(T, W, W)', + CUDA.rand(T, W, W)', CUDA.rand(T, W, V4), - #CUDA.rand(T, V4, W)', + CUDA.rand(T, V4, W)', DiagonalTensorMap(CUDA.rand(T, reduceddim(V1)), V1), ) @@ -105,9 +105,9 @@ for V in spacelist for T in eltypes, t in ( CUDA.rand(T, W, W), - #CUDA.rand(T, W, W)', + CUDA.rand(T, W, W)', CUDA.rand(T, W, V4), - #CUDA.rand(T, V4, W)', + CUDA.rand(T, V4, W)', DiagonalTensorMap(CUDA.rand(T, reduceddim(V1)), V1), ) @@ -157,9 +157,9 @@ for V in spacelist for T in eltypes, t in ( CUDA.rand(T, W, W), - #CUDA.rand(T, W, W)', + CUDA.rand(T, W, W)', CUDA.rand(T, W, V4), - #CUDA.rand(T, V4, W)', + CUDA.rand(T, V4, W)', DiagonalTensorMap(CUDA.rand(T, reduceddim(V1)), V1), ) @@ -167,7 +167,7 @@ for V in spacelist w, p = @constinferred left_polar(t) @test w * p ≈ t @test isisometric(w) - @test isposdef(p) + @test isposdef(project_hermitian!(p)) w, p = @constinferred left_orth(t; alg = :polar) @test w * p ≈ t @@ -177,16 +177,16 @@ for V in spacelist for T in eltypes, t in ( CUDA.rand(T, W, W), - #CUDA.rand(T, W, W)', + CUDA.rand(T, W, W)', CUDA.rand(T, V4, W), - #CUDA.rand(T, W, V4)', + CUDA.rand(T, W, V4)', ) @assert codomain(t) ≾ domain(t) p, wᴴ = @constinferred right_polar(t) @test p * wᴴ ≈ t @test isisometric(wᴴ; side = :right) - @test isposdef(p) + @test isposdef(project_hermitian!(p)) p, wᴴ = @constinferred right_orth(t; alg = :polar) @test p * wᴴ ≈ t @@ -198,11 +198,11 @@ for V in spacelist for T in eltypes, t in ( CUDA.rand(T, W, W), - #CUDA.rand(T, W, W)', + CUDA.rand(T, W, W)', CUDA.rand(T, W, V4), CUDA.rand(T, V4, W), - #CUDA.rand(T, W, V4)', - #CUDA.rand(T, V4, W)', + CUDA.rand(T, W, V4)', + CUDA.rand(T, V4, W)', DiagonalTensorMap(CUDA.rand(T, reduceddim(V1)), V1), ) @@ -338,7 +338,7 @@ for V in spacelist t in ( CUDA.rand(T, V1, V1), CUDA.rand(T, W, W), - #CUDA.rand(T, W, W)', + CUDA.rand(T, W, W)', DiagonalTensorMap(CUDA.rand(T, reduceddim(V1)), V1), ) @@ -395,11 +395,11 @@ for V in spacelist for T in eltypes, t in ( CUDA.rand(T, W, W), - #CUDA.rand(T, W, W)', + CUDA.rand(T, W, W)', CUDA.rand(T, W, V4), CUDA.rand(T, V4, W), - #CUDA.rand(T, W, V4)', - #CUDA.rand(T, V4, W)', + CUDA.rand(T, W, V4)', + CUDA.rand(T, V4, W)', DiagonalTensorMap(CUDA.rand(T, reduceddim(V1)), V1), ) @@ -425,7 +425,7 @@ for V in spacelist end for T in eltypes, t in ( CUDA.rand(T, W, W), - #CUDA.rand(T, W, W)', + CUDA.rand(T, W, W)', ) project_hermitian!(t) vals = @constinferred LinearAlgebra.eigvals(t) @@ -440,7 +440,7 @@ for V in spacelist t in ( CUDA.rand(T, V1, V1), CUDA.rand(T, W, W), - #CUDA.rand(T, W, W)', + CUDA.rand(T, W, W)', DiagonalTensorMap(CUDA.rand(T, reduceddim(V1)), V1), ) normalize!(t) @@ -472,9 +472,9 @@ for V in spacelist for T in eltypes, t in ( CUDA.randn(T, W, W), - #CUDA.randn(T, W, W)', + CUDA.randn(T, W, W)', CUDA.randn(T, W, V4), - #CUDA.randn(T, V4, W)', + CUDA.randn(T, V4, W)', ) t2 = project_isometric(t) @test isisometric(t2) From 54259b15abf9474e10d70a5f189fb2ca7e2a33e3 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Mon, 5 Jan 2026 07:13:33 +0100 Subject: [PATCH 3/7] Update test/cuda/factorizations.jl Co-authored-by: Lukas Devos --- test/cuda/factorizations.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/cuda/factorizations.jl b/test/cuda/factorizations.jl index 8e03368ab..9155e5212 100644 --- a/test/cuda/factorizations.jl +++ b/test/cuda/factorizations.jl @@ -429,8 +429,8 @@ for V in spacelist ) project_hermitian!(t) vals = @constinferred LinearAlgebra.eigvals(t) - λmax = maximum(s -> maximum(abs, s), values(vals)) - λmin = minimum(s -> minimum(abs, s), values(vals)) + λmax = maximum(abs, parent(vals)) + λmin = minimum(abs, parent(vals)) @test cond(t) ≈ λmax / λmin end end From 07a89b5867d7d75cfe79c9b66e34cef519376921 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Mon, 5 Jan 2026 07:13:47 +0100 Subject: [PATCH 4/7] Update test/cuda/factorizations.jl Co-authored-by: Jutho --- test/cuda/factorizations.jl | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/test/cuda/factorizations.jl b/test/cuda/factorizations.jl index 9155e5212..aa7936bd4 100644 --- a/test/cuda/factorizations.jl +++ b/test/cuda/factorizations.jl @@ -14,20 +14,16 @@ using CUDA: rand as curand, rand! as curand!, randn as curandn, randn! as curand @isdefined(TestSetup) || include("../setup.jl") using .TestSetup -spacelist = try - if ENV["CI"] == "true" - println("Detected running on CI") - if Sys.iswindows() - (Vtr, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂, VIB_diag) - elseif Sys.isapple() - (Vtr, Vℤ₃, VfU₁, VfSU₂, VIB_M) - else - (Vtr, VU₁, VCU₁, VSU₂, VfSU₂, VIB_diag, VIB_M) - end +spacelist = if get(ENV, "CI", "false") == "true" + println("Detected running on CI") + if Sys.iswindows() + (Vtr, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂, VIB_diag) + elseif Sys.isapple() + (Vtr, Vℤ₃, VfU₁, VfSU₂, VIB_M) else - (Vtr, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂, VfSU₂, VIB_diag, VIB_M) + (Vtr, VU₁, VCU₁, VSU₂, VfSU₂, VIB_diag, VIB_M) end -catch +else (Vtr, Vℤ₃, VU₁, VfU₁, VCU₁, VSU₂, VfSU₂, VIB_diag, VIB_M) end From 02266d80e45a180657729636d3c1fc097c9ab047 Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Mon, 5 Jan 2026 04:31:23 -0500 Subject: [PATCH 5/7] Fix posdef and copy --- ext/TensorKitCUDAExt/cutensormap.jl | 14 +++++--------- src/tensors/linalg.jl | 4 +++- test/cuda/factorizations.jl | 8 ++++---- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/ext/TensorKitCUDAExt/cutensormap.jl b/ext/TensorKitCUDAExt/cutensormap.jl index 28927687b..09032c3b0 100644 --- a/ext/TensorKitCUDAExt/cutensormap.jl +++ b/ext/TensorKitCUDAExt/cutensormap.jl @@ -121,11 +121,15 @@ function LinearAlgebra.isposdef(t::CuTensorMap) # do our own hermitian check isherm = MatrixAlgebraKit.ishermitian(b; atol = eps(real(eltype(b))), rtol = eps(real(eltype(b)))) isherm || return false - isposdef(Hermitian(b)) || return false + isposdef(project_hermitian!(b)) || return false end return true end +function LinearAlgebra.isposdef(t::TensorKit.AdjointTensorMap{T, S, N₁, N₂, <:CuTensorMap}) where {T, S, N₁, N₂} + return isposdef(adjoint(t)) +end + function Base.promote_rule( ::Type{<:TT₁}, ::Type{<:TT₂} @@ -164,11 +168,3 @@ for f in (:sqrt, :log, :asin, :acos, :acosh, :atanh, :acoth) return tf end end - -function Base.copy!(tdst::CuTensorMap, tsrc::AdjointTensorMap) - space(tdst) == space(tsrc) || throw(SpaceMismatch("$(space(tdst)) ≠ $(space(tsrc))")) - for ((c, bdst), (_, bsrc)) in zip(blocks(tdst), blocks(tsrc)) - copy!(bdst, bsrc) - end - return tdst -end diff --git a/src/tensors/linalg.jl b/src/tensors/linalg.jl index 4b2c7d4ee..f37d2d0e9 100644 --- a/src/tensors/linalg.jl +++ b/src/tensors/linalg.jl @@ -197,13 +197,15 @@ LinearAlgebra.isdiag(t::AbstractTensorMap) = all(LinearAlgebra.isdiag ∘ last, # In-place methods #------------------ # Wrapping the blocks in a StridedView enables multithreading if JULIA_NUM_THREADS > 1 +# but can cause problems with underlying array types (like CuArray) that don't yet play +# nicely with StridedViews # TODO: reconsider this strategy, consider spawning different threads for different blocks # Copy, adjoint and fill: function Base.copy!(tdst::AbstractTensorMap, tsrc::AbstractTensorMap) space(tdst) == space(tsrc) || throw(SpaceMismatch("$(space(tdst)) ≠ $(space(tsrc))")) for ((c, bdst), (_, bsrc)) in zip(blocks(tdst), blocks(tsrc)) - copy!(StridedView(bdst), StridedView(bsrc)) + copy!(bdst, bsrc) end return tdst end diff --git a/test/cuda/factorizations.jl b/test/cuda/factorizations.jl index aa7936bd4..f7b6ad6d6 100644 --- a/test/cuda/factorizations.jl +++ b/test/cuda/factorizations.jl @@ -150,7 +150,7 @@ for V in spacelist end @testset "Polar decomposition" begin - for T in eltypes, + @testset for T in eltypes, t in ( CUDA.rand(T, W, W), CUDA.rand(T, W, W)', @@ -163,14 +163,14 @@ for V in spacelist w, p = @constinferred left_polar(t) @test w * p ≈ t @test isisometric(w) - @test isposdef(project_hermitian!(p)) + @test isposdef(p) w, p = @constinferred left_orth(t; alg = :polar) @test w * p ≈ t @test isisometric(w) end - for T in eltypes, + @testset for T in eltypes, t in ( CUDA.rand(T, W, W), CUDA.rand(T, W, W)', @@ -182,7 +182,7 @@ for V in spacelist p, wᴴ = @constinferred right_polar(t) @test p * wᴴ ≈ t @test isisometric(wᴴ; side = :right) - @test isposdef(project_hermitian!(p)) + @test isposdef(p) p, wᴴ = @constinferred right_orth(t; alg = :polar) @test p * wᴴ ≈ t From 788fed186c61052c0f890fd5cfa34db4d164e29d Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Mon, 5 Jan 2026 09:57:54 -0500 Subject: [PATCH 6/7] Comments --- ext/TensorKitCUDAExt/cutensormap.jl | 8 ++------ src/factorizations/adjoint.jl | 4 ++++ src/tensors/linalg.jl | 4 ---- 3 files changed, 6 insertions(+), 10 deletions(-) diff --git a/ext/TensorKitCUDAExt/cutensormap.jl b/ext/TensorKitCUDAExt/cutensormap.jl index 09032c3b0..3274e654a 100644 --- a/ext/TensorKitCUDAExt/cutensormap.jl +++ b/ext/TensorKitCUDAExt/cutensormap.jl @@ -119,17 +119,13 @@ function LinearAlgebra.isposdef(t::CuTensorMap) InnerProductStyle(spacetype(t)) === EuclideanInnerProduct() || return false for (c, b) in blocks(t) # do our own hermitian check - isherm = MatrixAlgebraKit.ishermitian(b; atol = eps(real(eltype(b))), rtol = eps(real(eltype(b)))) + isherm = MatrixAlgebraKit.ishermitian(b) isherm || return false - isposdef(project_hermitian!(b)) || return false + isposdef(Hermitian(b)) || return false end return true end -function LinearAlgebra.isposdef(t::TensorKit.AdjointTensorMap{T, S, N₁, N₂, <:CuTensorMap}) where {T, S, N₁, N₂} - return isposdef(adjoint(t)) -end - function Base.promote_rule( ::Type{<:TT₁}, ::Type{<:TT₂} diff --git a/src/factorizations/adjoint.jl b/src/factorizations/adjoint.jl index 8a75fc2fd..20d6d5986 100644 --- a/src/factorizations/adjoint.jl +++ b/src/factorizations/adjoint.jl @@ -111,3 +111,7 @@ function MAK.svd_compact!(t::AdjointTensorMap, F, alg::DiagonalAlgorithm) F′ = svd_compact!(adjoint(t), reverse(adjoint.(F)), _adjoint(alg)) return reverse(adjoint.(F′)) end + +function LinearAlgebra.isposdef(t::AdjointTensorMap) + return isposdef(adjoint(t)) +end diff --git a/src/tensors/linalg.jl b/src/tensors/linalg.jl index f37d2d0e9..38c45c1f9 100644 --- a/src/tensors/linalg.jl +++ b/src/tensors/linalg.jl @@ -196,10 +196,6 @@ LinearAlgebra.isdiag(t::AbstractTensorMap) = all(LinearAlgebra.isdiag ∘ last, # In-place methods #------------------ -# Wrapping the blocks in a StridedView enables multithreading if JULIA_NUM_THREADS > 1 -# but can cause problems with underlying array types (like CuArray) that don't yet play -# nicely with StridedViews -# TODO: reconsider this strategy, consider spawning different threads for different blocks # Copy, adjoint and fill: function Base.copy!(tdst::AbstractTensorMap, tsrc::AbstractTensorMap) From cf0418eee19340aba868fba47c553aeac66f437e Mon Sep 17 00:00:00 2001 From: Katharine Hyatt Date: Wed, 7 Jan 2026 10:24:45 -0500 Subject: [PATCH 7/7] Try using truncation on CUDA --- Project.toml | 4 ++++ test/cuda/factorizations.jl | 31 +++++++++++++++---------------- 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/Project.toml b/Project.toml index 8ca217db8..02195559d 100644 --- a/Project.toml +++ b/Project.toml @@ -79,3 +79,7 @@ cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1" [targets] test = ["ArgParse", "Adapt", "Aqua", "Combinatorics", "CUDA", "cuTENSOR", "GPUArrays", "LinearAlgebra", "SafeTestsets", "TensorOperations", "Test", "TestExtras", "ChainRulesCore", "ChainRulesTestUtils", "FiniteDifferences", "Zygote"] + +[sources] +GPUArrays = {rev = "master", url = "https://github.com/JuliaGPU/GPUArrays.jl"} +MatrixAlgebraKit = {rev = "main", url = "https://github.com/QuantumKitHub/MatrixAlgebraKit.jl"} diff --git a/test/cuda/factorizations.jl b/test/cuda/factorizations.jl index f7b6ad6d6..2f8cf876e 100644 --- a/test/cuda/factorizations.jl +++ b/test/cuda/factorizations.jl @@ -1,4 +1,4 @@ -using Adapt, CUDA, cuTENSOR +using Adapt, GPUArrays, CUDA, cuTENSOR using Test, TestExtras using TensorKit using LinearAlgebra: LinearAlgebra @@ -229,17 +229,17 @@ for V in spacelist @test isisometric(N) @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t)) - #N = @constinferred left_null(t; trunc = (; atol = 100 * eps(norm(t)))) - #@test isisometric(N) - #@test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t)) + #=N = @constinferred left_null(t; trunc = (; atol = 100 * eps(norm(t)))) + @test isisometric(N) + @test norm(N' * t) ≈ 0 atol = 100 * eps(norm(t))=# Nᴴ = @constinferred right_null(t; alg = :svd) @test isisometric(Nᴴ; side = :right) @test norm(t * Nᴴ') ≈ 0 atol = 100 * eps(norm(t)) - #Nᴴ = @constinferred right_null(t; trunc = (; atol = 100 * eps(norm(t)))) - #@test isisometric(Nᴴ; side = :right) - #@test norm(t * Nᴴ') ≈ 0 atol = 100 * eps(norm(t)) + #=Nᴴ = @constinferred right_null(t; trunc = (; atol = 100 * eps(norm(t)))) + @test isisometric(Nᴴ; side = :right) + @test norm(t * Nᴴ') ≈ 0 atol = 100 * eps(norm(t))=# end # empty tensor @@ -262,11 +262,11 @@ for V in spacelist for T in eltypes, t in ( CUDA.randn(T, W, W), - #CUDA.randn(T, W, W)', + CUDA.randn(T, W, W)', CUDA.randn(T, W, V4), CUDA.randn(T, V4, W), - #CUDA.randn(T, W, V4)', - #CUDA.randn(T, V4, W)', + CUDA.randn(T, W, V4)', + CUDA.randn(T, V4, W)', DiagonalTensorMap(CUDA.randn(T, reduceddim(V1)), V1), ) @@ -327,7 +327,7 @@ for V in spacelist @test minimum(diagview(S5)) >= λ @test dim(domain(S5)) ≤ nvals end - end=# # TODO + end=# @testset "Eigenvalue decomposition" begin for T in eltypes, @@ -349,10 +349,10 @@ for V in spacelist @test @constinferred isposdef(vdv) t isa DiagonalTensorMap || @test !isposdef(t) # unlikely for non-hermitian map - #=nvals = round(Int, dim(domain(t)) / 2) + nvals = round(Int, dim(domain(t)) / 2) d, v = @constinferred eig_trunc(t; trunc = truncrank(nvals)) @test t * v ≈ v * d - @test dim(domain(d)) ≤ nvals=# + @test dim(domain(d)) ≤ nvals t2 = @constinferred project_hermitian(t) D, V = eigen(t2) @@ -380,10 +380,9 @@ for V in spacelist @test isposdef(t2 - λ * one(t) + 0.1 * one(t2)) @test !isposdef(t2 - λ * one(t) - 0.1 * one(t2)) - # TODO - #=d, v = @constinferred eigh_trunc(t2; trunc = truncrank(nvals)) + d, v = @constinferred eigh_trunc(t2; trunc = truncrank(nvals)) @test t2 * v ≈ v * d - @test dim(domain(d)) ≤ nvals=# + @test dim(domain(d)) ≤ nvals end end