Skip to content

Conversation

@eschnett
Copy link

Julia 1.13 added a diag::Bool argument to copytri!. We need to mimic that so that we can match the signature when overloading for GPUArrays.

@github-actions
Copy link
Contributor

github-actions bot commented Jan 19, 2026

Your PR requires formatting changes to meet the project's style guidelines.
Please consider running Runic (git runic master) to apply these changes.

Click here to view the suggested changes.
diff --git a/src/host/linalg.jl b/src/host/linalg.jl
index 5f3b67a..fc8d200 100644
--- a/src/host/linalg.jl
+++ b/src/host/linalg.jl
@@ -116,8 +116,8 @@ if VERSION >= v"1.13-a"
                 I = @index(Global, Cartesian)
                 i, j = Tuple(I)
                 if j + diag > i
-                    @inbounds _A[j,i] = conj(_A[i,j])
-              end
+                    @inbounds _A[j, i] = conj(_A[i, j])
+                end
             end
             U_conj!(get_backend(A))(A; ndrange = size(A))
         elseif uplo == 'U' && !conjugate
@@ -125,8 +125,8 @@ if VERSION >= v"1.13-a"
                 I = @index(Global, Cartesian)
                 i, j = Tuple(I)
                 if j + diag > i
-                    @inbounds _A[j,i] = _A[i,j]
-              end
+                    @inbounds _A[j, i] = _A[i, j]
+                end
             end
             U_noconj!(get_backend(A))(A; ndrange = size(A))
         elseif uplo == 'L' && conjugate
@@ -134,7 +134,7 @@ if VERSION >= v"1.13-a"
                 I = @index(Global, Cartesian)
                 i, j = Tuple(I)
                 if j + diag > i
-                    @inbounds _A[i,j] = conj(_A[j,i])
+                    @inbounds _A[i, j] = conj(_A[j, i])
                 end
             end
             L_conj!(get_backend(A))(A; ndrange = size(A))
@@ -143,14 +143,14 @@ if VERSION >= v"1.13-a"
                 I = @index(Global, Cartesian)
                 i, j = Tuple(I)
                 if j + diag > i
-                    @inbounds _A[i,j] = _A[j,i]
+                    @inbounds _A[i, j] = _A[j, i]
                 end
             end
             L_noconj!(get_backend(A))(A; ndrange = size(A))
         else
             throw(ArgumentError("uplo argument must be 'U' (upper) or 'L' (lower), got $uplo"))
         end
-        A
+        return A
     end
 
 else
@@ -163,8 +163,8 @@ else
                 I = @index(Global, Cartesian)
                 i, j = Tuple(I)
                 if j > i
-                    @inbounds _A[j,i] = conj(_A[i,j])
-              end
+                    @inbounds _A[j, i] = conj(_A[i, j])
+                end
             end
             U_conj!(get_backend(A))(A; ndrange = size(A))
         elseif uplo == 'U' && !conjugate
@@ -172,8 +172,8 @@ else
                 I = @index(Global, Cartesian)
                 i, j = Tuple(I)
                 if j > i
-                    @inbounds _A[j,i] = _A[i,j]
-              end
+                    @inbounds _A[j, i] = _A[i, j]
+                end
             end
             U_noconj!(get_backend(A))(A; ndrange = size(A))
         elseif uplo == 'L' && conjugate
@@ -181,7 +181,7 @@ else
                 I = @index(Global, Cartesian)
                 i, j = Tuple(I)
                 if j > i
-                    @inbounds _A[i,j] = conj(_A[j,i])
+                    @inbounds _A[i, j] = conj(_A[j, i])
                 end
             end
             L_conj!(get_backend(A))(A; ndrange = size(A))
@@ -190,14 +190,14 @@ else
                 I = @index(Global, Cartesian)
                 i, j = Tuple(I)
                 if j > i
-                    @inbounds _A[i,j] = _A[j,i]
+                    @inbounds _A[i, j] = _A[j, i]
                 end
             end
             L_noconj!(get_backend(A))(A; ndrange = size(A))
         else
             throw(ArgumentError("uplo argument must be 'U' (upper) or 'L' (lower), got $uplo"))
         end
-        A
+        return A
     end
 
 end
diff --git a/test/testsuite/linalg.jl b/test/testsuite/linalg.jl
index 8bd98c7..a1459a4 100644
--- a/test/testsuite/linalg.jl
+++ b/test/testsuite/linalg.jl
@@ -81,8 +81,8 @@
                         continue
                     end
                     n = 128
-                    areal = randn(n,n)/2
-                    aimg  = randn(n,n)/2
+                    areal = randn(n, n) / 2
+                    aimg = randn(n, n) / 2
                     a = convert(Matrix{eltya}, eltya <: Complex ? complex.(areal, aimg) : areal)
                     @test compare(x -> LinearAlgebra.copytri!(x, uplo, conjugate, diag), AT, a)
                 end
@@ -93,8 +93,8 @@
                         continue
                     end
                     n = 128
-                    areal = randn(n,n)/2
-                    aimg  = randn(n,n)/2
+                    areal = randn(n, n) / 2
+                    aimg = randn(n, n) / 2
                     a = convert(Matrix{eltya}, eltya <: Complex ? complex.(areal, aimg) : areal)
                     @test compare(x -> LinearAlgebra.copytri!(x, uplo, conjugate), AT, a)
                 end

@eschnett
Copy link
Author

I think runic's indentation is wrong. Look at these lines:

        I = @index(Global, Cartesian)
        i, j = Tuple(I)
            if j + diag > i
          @inbounds _A[j,i] = conj(_A[i,j])
        end

The if statement is spuriously indented too far.

@maleadt
Copy link
Member

maleadt commented Jan 20, 2026

Yeah feel free to ignore Runic...

@maleadt
Copy link
Member

maleadt commented Jan 20, 2026

Can you add 1.13-nightly to the GH:A configuration too?

Also, this seems to break CUDA.jl etc tests on <1.13:

Testing finished in 40 minutes, 23 seconds, 298 milliseconds
2026-01-19 22:24:29 CEST
	Worker 3 failed running test gpuarrays/linalg/core:
2026-01-19 22:24:29 CEST
	Some tests did not pass: 420 passed, 0 failed, 8 errored, 0 broken.
2026-01-19 22:24:29 CEST
	gpuarrays/linalg/core: Error During Test at /var/lib/buildkite-agent/builds/gpuci-17/julialang/gpuarrays-dot-jl/test/testsuite/linalg.jl:86
2026-01-19 22:24:30 CEST
	  Test threw exception
2026-01-19 22:24:30 CEST
	  Expression: compare((x->begin
2026-01-19 22:24:30 CEST
	            #= /var/lib/buildkite-agent/builds/gpuci-17/julialang/gpuarrays-dot-jl/test/testsuite/linalg.jl:86 =#
2026-01-19 22:24:30 CEST
	            LinearAlgebra.copytri!(x, uplo, conjugate, diag)
2026-01-19 22:24:30 CEST
	        end), AT, a)
2026-01-19 22:24:30 CEST
	  KernelException: exception thrown during kernel execution on device Quadro RTX 5000

@eschnett
Copy link
Author

We're going to need to ignore the Buildkite error at the moment. It fails because CUDA.jl isn't working with Julia 1.13. That's a known problem, and a prerequisite for making it work is making GPUArrays.jl work with Julia 1.13...

@eschnett
Copy link
Author

This is running tests on the macOS-latest image. This image changed from x86_64 to aarch64 recently. Are you aware of this? Would this be using an aarch64 Julia, or is this running an x86_64 Julia on aarch64 via Rosetta? It's running very slowly, that's sure.

@eschnett
Copy link
Author

... it's using julia/1.12.4/aarch64, all is fine.

@maleadt
Copy link
Member

maleadt commented Jan 21, 2026

CUDA.jl on 1.12 failing here but not on master, https://buildkite.com/julialang/gpuarrays-dot-jl/builds/1565, is interesting. I thought I fixed JuliaGPU/CUDA.jl#2946 in JuliaGPU/CUDA.jl#3016, maybe we need another quirk?

@eschnett
Copy link
Author

The Julia 1.12 error is

�_bk;t=1768928863646�Worker 4 failed running test gpuarrays/linalg/norm:
�_bk;t=1768928863720��[91mSome tests did not pass: 664 passed, 0 failed, 32 errored, 0 broken.�[39m
�_bk;t=1768928863754�gpuarrays/linalg/norm: �[91m�[1mError During Test�[22m�[39m at �[39m�[1m/var/lib/buildkite-agent/builds/gpuci-15/julialang/gpuarrays-dot-jl/test/testsuite/linalg.jl:525�[22m
�_bk;t=1768928864289�  Test threw exception
�_bk;t=1768928864294�  Expression: compare(norm, AT, arr, Ref(p))
�_bk;t=1768928864312�  Failed to compile PTX code (ptxas exited with code 255)
�_bk;t=1768928864312�  Invocation arguments: --generate-line-info --verbose --gpu-name sm_75 --output-file /tmp/jl_7apmpGDCDA.cubin /tmp/jl_nE3ul05szU.ptx
�_bk;t=1768928864312�  ptxas /tmp/jl_nE3ul05szU.ptx, line 671; error   : Modifier '.NaN' requires .target sm_80 or higher
�_bk;t=1768928864312�  ptxas /tmp/jl_nE3ul05szU.ptx, line 671; error   : Feature 'max.f16 or max.f16x2' requires .target sm_80 or higher

It seems that some part of the infrastructure generates code for a newer GPU than it should.

@eschnett
Copy link
Author

I think I see what is happening. The CI machines have different GPUs:

  • NVIDIA A100-PCIE-40GB MIG 1g.5gb (sm_80, 4.713 GiB / 4.750 GiB available)
  • Quadro RTX 5000 (sm_75, 15.550 GiB / 16.000 GiB available)

The first supports sm_80 and the tests succeed, the second doesn't and the tests fail. This appears unrelated to my changes.

The problem might be in the file CUDA/src/device/intrinsics/math.jl in the function @device_override @inline function Base.max(x::Float32, y::Float32).

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants