From 31347daa9276f46ea76a95a5be6a16d8d7cdef78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 18 Dec 2025 09:44:41 +0100 Subject: [PATCH 01/19] GH-48582: [CI][GPU][C++][Python] Add new cuda jobs using the new self-hosted runners --- .github/workflows/cpp_extra.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.github/workflows/cpp_extra.yml b/.github/workflows/cpp_extra.yml index 7ab4c73270d..ca16f530f90 100644 --- a/.github/workflows/cpp_extra.yml +++ b/.github/workflows/cpp_extra.yml @@ -110,6 +110,16 @@ jobs: -e ARROW_USE_MESON=ON runs-on: ubuntu-latest title: AMD64 Ubuntu Meson + - envs: + - CUDA=13.0.2 + image: ubuntu-cuda-cpp + runs-on: "runs-on=${{ github.run_id }}/family=g4dn.xlarge/image=ubuntu24-gpu-x64" + title: AMD64 Ubuntu 24 CUDA 13.0.2 + - envs: + - CUDA=11.7.1 + image: ubuntu-cuda-cpp + runs-on: "runs-on=${{ github.run_id }}/family=g4dn.xlarge/image=ubuntu22-gpu-x64" + title: AMD64 Ubuntu 22 CUDA 11.7.1 # TODO: We should remove this "continue-on-error: true" once GH-47207 is resolved - continue-on-error: true envs: From 0fd6459f38827a88689394c73b0ec9e5375876e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 18 Dec 2025 10:37:29 +0100 Subject: [PATCH 02/19] Set Ubuntu for archery via env --- .github/workflows/cpp_extra.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/cpp_extra.yml b/.github/workflows/cpp_extra.yml index ca16f530f90..09c1059bd82 100644 --- a/.github/workflows/cpp_extra.yml +++ b/.github/workflows/cpp_extra.yml @@ -112,13 +112,15 @@ jobs: title: AMD64 Ubuntu Meson - envs: - CUDA=13.0.2 + - UBUNTU=24.04 image: ubuntu-cuda-cpp runs-on: "runs-on=${{ github.run_id }}/family=g4dn.xlarge/image=ubuntu24-gpu-x64" title: AMD64 Ubuntu 24 CUDA 13.0.2 - envs: - CUDA=11.7.1 + - UBUNTU=22.04 image: ubuntu-cuda-cpp - runs-on: "runs-on=${{ github.run_id }}/family=g4dn.xlarge/image=ubuntu22-gpu-x64" + runs-on: "runs-on=${{ github.run_id }}/family=g4dn.xlarge/image=ubuntu24-gpu-x64" title: AMD64 Ubuntu 22 CUDA 11.7.1 # TODO: We should remove this "continue-on-error: true" once GH-47207 is resolved - continue-on-error: true From 9abdba1e4b6b7a8e9128f24904b2699293afce58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 18 Dec 2025 18:27:46 +0100 Subject: [PATCH 03/19] Try using g6f.large instances --- .github/workflows/cpp_extra.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cpp_extra.yml b/.github/workflows/cpp_extra.yml index 09c1059bd82..50ea9efd420 100644 --- a/.github/workflows/cpp_extra.yml +++ b/.github/workflows/cpp_extra.yml @@ -114,13 +114,13 @@ jobs: - CUDA=13.0.2 - UBUNTU=24.04 image: ubuntu-cuda-cpp - runs-on: "runs-on=${{ github.run_id }}/family=g4dn.xlarge/image=ubuntu24-gpu-x64" + runs-on: "runs-on=${{ github.run_id }}/family=g6f.large/image=ubuntu24-gpu-x64" title: AMD64 Ubuntu 24 CUDA 13.0.2 - envs: - CUDA=11.7.1 - UBUNTU=22.04 image: ubuntu-cuda-cpp - runs-on: "runs-on=${{ github.run_id }}/family=g4dn.xlarge/image=ubuntu24-gpu-x64" + runs-on: "runs-on=${{ github.run_id }}/family=g6f.large/image=ubuntu24-gpu-x64" title: AMD64 Ubuntu 22 CUDA 11.7.1 # TODO: We should remove this "continue-on-error: true" once GH-47207 is resolved - continue-on-error: true From 0e015b8dcfa8e76cbb038704d539f8e8ba00189c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 18 Dec 2025 18:32:41 +0100 Subject: [PATCH 04/19] Back to g4dn.xlarge to test --- .github/workflows/cpp_extra.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cpp_extra.yml b/.github/workflows/cpp_extra.yml index 50ea9efd420..09c1059bd82 100644 --- a/.github/workflows/cpp_extra.yml +++ b/.github/workflows/cpp_extra.yml @@ -114,13 +114,13 @@ jobs: - CUDA=13.0.2 - UBUNTU=24.04 image: ubuntu-cuda-cpp - runs-on: "runs-on=${{ github.run_id }}/family=g6f.large/image=ubuntu24-gpu-x64" + runs-on: "runs-on=${{ github.run_id }}/family=g4dn.xlarge/image=ubuntu24-gpu-x64" title: AMD64 Ubuntu 24 CUDA 13.0.2 - envs: - CUDA=11.7.1 - UBUNTU=22.04 image: ubuntu-cuda-cpp - runs-on: "runs-on=${{ github.run_id }}/family=g6f.large/image=ubuntu24-gpu-x64" + runs-on: "runs-on=${{ github.run_id }}/family=g4dn.xlarge/image=ubuntu24-gpu-x64" title: AMD64 Ubuntu 22 CUDA 11.7.1 # TODO: We should remove this "continue-on-error: true" once GH-47207 is resolved - continue-on-error: true From 795704c8077aa8c9ed635e8694b3e1670214ad8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 18 Dec 2025 18:39:04 +0100 Subject: [PATCH 05/19] Try removing runs-on from matrix --- .github/workflows/cpp_extra.yml | 84 ++++++++++++++++++++++++++++----- 1 file changed, 73 insertions(+), 11 deletions(-) diff --git a/.github/workflows/cpp_extra.yml b/.github/workflows/cpp_extra.yml index 09c1059bd82..6e01c8d3063 100644 --- a/.github/workflows/cpp_extra.yml +++ b/.github/workflows/cpp_extra.yml @@ -110,27 +110,88 @@ jobs: -e ARROW_USE_MESON=ON runs-on: ubuntu-latest title: AMD64 Ubuntu Meson + # TODO: We should remove this "continue-on-error: true" once GH-47207 is resolved + - continue-on-error: true + envs: + - DEBIAN=13 + image: debian-cpp + run-options: >- + -e CMAKE_CXX_STANDARD=23 + runs-on: ubuntu-latest + title: AMD64 Debian C++23 + env: + ARCHERY_DEBUG: 1 + ARROW_ENABLE_TIMING_TESTS: OFF + DOCKER_VOLUME_PREFIX: ".docker/" + steps: + - name: Checkout Arrow + uses: actions/checkout@v6 + with: + fetch-depth: 0 + submodules: recursive + - name: Cache Docker Volumes + uses: actions/cache@v5 + with: + path: .docker + key: extra-${{ matrix.image }}-${{ hashFiles('cpp/**') }} + restore-keys: extra-${{ matrix.image }}- + - name: Setup Python + uses: actions/setup-python@v6 + with: + python-version: 3 + - name: Setup Archery + run: python3 -m pip install -e dev/archery[docker] + - name: Execute Docker Build + continue-on-error: ${{ matrix.continue-on-error || false }} + env: + ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} + ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} + ENVS: ${{ toJSON(matrix.envs) }} + run: | + # GH-40558: reduce ASLR to avoid ASAN/LSAN crashes + sudo sysctl -w vm.mmap_rnd_bits=28 + source ci/scripts/util_enable_core_dumps.sh + if [ "${ENVS}" != "null" ]; then + echo "${ENVS}" | jq -r '.[]' | while read env; do + echo "${env}" >> .env + done + fi + archery docker run ${{ matrix.run-options || '' }} ${{ matrix.image }} + - name: Docker Push + if: >- + success() && + github.event_name == 'push' && + github.repository == 'apache/arrow' && + github.ref_name == 'main' + env: + ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} + ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} + continue-on-error: true + run: archery docker push ${{ matrix.image }} + + cuda: + needs: check-labels + name: ${{ matrix.title }} + runs-on: "runs-on=${{ github.run_id }}/family=g4dn.xlarge/image=ubuntu24-gpu-x64" + if: >- + needs.check-labels.outputs.force == 'true' || + contains(fromJSON(needs.check-labels.outputs.ci-extra-labels || '[]'), 'CI: Extra') || + contains(fromJSON(needs.check-labels.outputs.ci-extra-labels || '[]'), 'CI: Extra: C++') + timeout-minutes: 75 + strategy: + fail-fast: false + matrix: + include: - envs: - CUDA=13.0.2 - UBUNTU=24.04 image: ubuntu-cuda-cpp - runs-on: "runs-on=${{ github.run_id }}/family=g4dn.xlarge/image=ubuntu24-gpu-x64" title: AMD64 Ubuntu 24 CUDA 13.0.2 - envs: - CUDA=11.7.1 - UBUNTU=22.04 image: ubuntu-cuda-cpp - runs-on: "runs-on=${{ github.run_id }}/family=g4dn.xlarge/image=ubuntu24-gpu-x64" title: AMD64 Ubuntu 22 CUDA 11.7.1 - # TODO: We should remove this "continue-on-error: true" once GH-47207 is resolved - - continue-on-error: true - envs: - - DEBIAN=13 - image: debian-cpp - run-options: >- - -e CMAKE_CXX_STANDARD=23 - runs-on: ubuntu-latest - title: AMD64 Debian C++23 env: ARCHERY_DEBUG: 1 ARROW_ENABLE_TIMING_TESTS: OFF @@ -473,6 +534,7 @@ jobs: report-extra-cpp: if: github.event_name == 'schedule' && always() needs: + - cuda - docker - jni-linux - jni-macos From cf8c88bfd98630d3eab8f354f5c79c8682b9dc86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Wed, 7 Jan 2026 12:28:58 +0100 Subject: [PATCH 06/19] Add Python cuda jobs --- .github/workflows/cpp_extra.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.github/workflows/cpp_extra.yml b/.github/workflows/cpp_extra.yml index 6e01c8d3063..10ee9e32f74 100644 --- a/.github/workflows/cpp_extra.yml +++ b/.github/workflows/cpp_extra.yml @@ -192,6 +192,16 @@ jobs: - UBUNTU=22.04 image: ubuntu-cuda-cpp title: AMD64 Ubuntu 22 CUDA 11.7.1 + - envs: + - CUDA=13.0.2 + - UBUNTU=24.04 + image: ubuntu-cuda-python + title: AMD64 Ubuntu 24 CUDA Python 13.0.2 + - envs: + - CUDA=11.7.1 + - UBUNTU=22.04 + image: ubuntu-cuda-python + title: AMD64 Ubuntu 22 CUDA Python 11.7.1 env: ARCHERY_DEBUG: 1 ARROW_ENABLE_TIMING_TESTS: OFF From e6c72b8f5423af24aaf4b24711cec25a14086f9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Wed, 7 Jan 2026 13:05:24 +0100 Subject: [PATCH 07/19] Try spot instances --- .github/workflows/cpp_extra.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cpp_extra.yml b/.github/workflows/cpp_extra.yml index 10ee9e32f74..651e7f50594 100644 --- a/.github/workflows/cpp_extra.yml +++ b/.github/workflows/cpp_extra.yml @@ -172,7 +172,7 @@ jobs: cuda: needs: check-labels name: ${{ matrix.title }} - runs-on: "runs-on=${{ github.run_id }}/family=g4dn.xlarge/image=ubuntu24-gpu-x64" + runs-on: "runs-on=${{ github.run_id }}/family=g4dn.xlarge/image=ubuntu24-gpu-x64/spot=capacity-optimized" if: >- needs.check-labels.outputs.force == 'true' || contains(fromJSON(needs.check-labels.outputs.ci-extra-labels || '[]'), 'CI: Extra') || From 51962862259c9b646288a5ffb8a25a6384ac7ab1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Wed, 7 Jan 2026 13:20:11 +0100 Subject: [PATCH 08/19] Try updating CUDA to 13.1.80 for numba Python compatibility --- .github/workflows/cpp_extra.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cpp_extra.yml b/.github/workflows/cpp_extra.yml index 651e7f50594..8321329679f 100644 --- a/.github/workflows/cpp_extra.yml +++ b/.github/workflows/cpp_extra.yml @@ -193,10 +193,10 @@ jobs: image: ubuntu-cuda-cpp title: AMD64 Ubuntu 22 CUDA 11.7.1 - envs: - - CUDA=13.0.2 + - CUDA=13.1.80 - UBUNTU=24.04 image: ubuntu-cuda-python - title: AMD64 Ubuntu 24 CUDA Python 13.0.2 + title: AMD64 Ubuntu 24 CUDA Python 13.1.80 - envs: - CUDA=11.7.1 - UBUNTU=22.04 From 996443a7244af13367760f66b8d049191319d816 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Wed, 7 Jan 2026 13:53:53 +0100 Subject: [PATCH 09/19] Revert "Try updating CUDA to 13.1.80 for numba Python compatibility" This reverts commit f5766b75963377c5a7f562a6944bf20d7460a218. --- .github/workflows/cpp_extra.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cpp_extra.yml b/.github/workflows/cpp_extra.yml index 8321329679f..651e7f50594 100644 --- a/.github/workflows/cpp_extra.yml +++ b/.github/workflows/cpp_extra.yml @@ -193,10 +193,10 @@ jobs: image: ubuntu-cuda-cpp title: AMD64 Ubuntu 22 CUDA 11.7.1 - envs: - - CUDA=13.1.80 + - CUDA=13.0.2 - UBUNTU=24.04 image: ubuntu-cuda-python - title: AMD64 Ubuntu 24 CUDA Python 13.1.80 + title: AMD64 Ubuntu 24 CUDA Python 13.0.2 - envs: - CUDA=11.7.1 - UBUNTU=22.04 From bd77345406c185a948fbad42d267eace45145066 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Wed, 7 Jan 2026 14:02:07 +0100 Subject: [PATCH 10/19] Remove unnecessary cuda jobs from archery tasks --- dev/tasks/docker-tests/github.cuda.yml | 52 -------------------------- dev/tasks/tasks.yml | 23 ------------ 2 files changed, 75 deletions(-) delete mode 100644 dev/tasks/docker-tests/github.cuda.yml diff --git a/dev/tasks/docker-tests/github.cuda.yml b/dev/tasks/docker-tests/github.cuda.yml deleted file mode 100644 index e65ac457b2e..00000000000 --- a/dev/tasks/docker-tests/github.cuda.yml +++ /dev/null @@ -1,52 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -{% import 'macros.jinja' as macros with context %} - -{{ macros.github_header() }} - -jobs: - test: - name: | - Docker Test {{ flags|default("") }} {{ image }} {{ command|default("") }} - runs-on: ['self-hosted', 'cuda'] -{{ macros.github_set_env(env) }} - timeout-minutes: {{ timeout|default(60) }} - steps: - {{ macros.github_checkout_arrow(fetch_depth=fetch_depth|default(1))|indent }} - # python 3.10 is installed on the runner, no need to install - - name: Install pip - run: sudo apt update && sudo apt install python3-pip -y - - name: Install archery - run: python3 -m pip install -e arrow/dev/archery[docker] - - name: Execute Docker Build - shell: bash - env: - {{ macros.github_set_sccache_envvars()|indent(8) }} - run: | - source arrow/ci/scripts/util_enable_core_dumps.sh - archery docker run \ - -e SETUPTOOLS_SCM_PRETEND_VERSION="{{ arrow.no_rc_version }}" \ - {{ flags|default("") }} \ - {{ image }} \ - {{ command|default("") }} - {% if arrow.is_default_branch() %} - {{ macros.github_login_dockerhub()|indent }} - - name: Push Docker Image - shell: bash - run: archery docker push {{ image }} - {% endif %} diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 266073daff6..ca82983a22b 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -762,35 +762,12 @@ tasks: artifacts: - docs.tar.gz - ############################## CUDA tests ################################# - -{% for ubuntu, cuda in [("22.04", "11.7.1"), ("24.04", "13.0.2")] %} - test-cuda-cpp-ubuntu-{{ ubuntu }}-cuda-{{ cuda }}: - ci: github - template: docker-tests/github.cuda.yml - params: - env: - CUDA: {{ cuda }} - UBUNTU: {{ ubuntu }} - image: ubuntu-cuda-cpp - - test-cuda-python-ubuntu-{{ ubuntu }}-cuda-{{ cuda }}: - ci: github - template: docker-tests/github.cuda.yml - params: - env: - CUDA: {{ cuda }} - UBUNTU: {{ ubuntu }} - image: ubuntu-cuda-python -{% endfor %} - ############################## Fuzz tests ################################# test-build-cpp-fuzz: ci: github template: fuzz-tests/github.oss-fuzz.yml - ############################## vcpkg tests ################################## test-build-vcpkg-win: From 499cfeb39ac7aa09644c28059d21f80da1862142 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Wed, 7 Jan 2026 15:32:36 +0100 Subject: [PATCH 11/19] Check nvidia-smi output --- .github/workflows/cpp_extra.yml | 5 +++++ compose.yaml | 1 + 2 files changed, 6 insertions(+) diff --git a/.github/workflows/cpp_extra.yml b/.github/workflows/cpp_extra.yml index 651e7f50594..e582aec31d8 100644 --- a/.github/workflows/cpp_extra.yml +++ b/.github/workflows/cpp_extra.yml @@ -224,6 +224,11 @@ jobs: python-version: 3 - name: Setup Archery run: python3 -m pip install -e dev/archery[docker] + - name: Display NVIDIA SMI details + run: | + nvidia-smi + nvidia-smi -L + nvidia-smi -q -d Memory - name: Execute Docker Build continue-on-error: ${{ matrix.continue-on-error || false }} env: diff --git a/compose.yaml b/compose.yaml index 84481e1af76..05d0c9085f7 100644 --- a/compose.yaml +++ b/compose.yaml @@ -1006,6 +1006,7 @@ services: deploy: *cuda-deploy command: &python-command > /bin/bash -c " + nvidia-smi && /arrow/ci/scripts/cpp_build.sh /arrow /build && /arrow/ci/scripts/python_build.sh /arrow /build && /arrow/ci/scripts/python_test.sh /arrow" From d79e0f44dc8cf443c88e9fa6dff6d0203f2355df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Wed, 7 Jan 2026 16:09:28 +0100 Subject: [PATCH 12/19] Try installing cuda-compat-13-0 --- ci/scripts/install_numba.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ci/scripts/install_numba.sh b/ci/scripts/install_numba.sh index 91c36ca5713..ee750f91b1a 100755 --- a/ci/scripts/install_numba.sh +++ b/ci/scripts/install_numba.sh @@ -57,6 +57,12 @@ else cuda_version="${DEFAULT_CUDA_VERSION}" fi +# Install CUDA compatibility package for CUDA 13 +if [ "${cuda_version}" = "13" ]; then + apt update + apt install -y cuda-compat-13-0 +fi + if [ "${numba_cuda}" = "master" ]; then pip install "numba-cuda[cu${cuda_version}] @ https://github.com/NVIDIA/numba-cuda/archive/main.tar.gz" elif [ "${numba_cuda}" = "latest" ]; then From 8acc7c71ac352736f63edc8ab02191dd615c8a9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Wed, 7 Jan 2026 16:23:43 +0100 Subject: [PATCH 13/19] Remove stray cuda tasks --- dev/tasks/tasks.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index ca82983a22b..2667aa1fb5e 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -41,9 +41,6 @@ groups: {############################# Testing tasks #################################} - cuda: - - test-cuda-* - test: - test-* From 6cba3c1d4984dadda679bf1d05d2062778baef48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 8 Jan 2026 11:08:51 +0100 Subject: [PATCH 14/19] Move Cuda jobs to its own extra workflow --- .github/workflows/cpp_extra.yml | 89 ---------------- .github/workflows/cuda_extra.yml | 173 +++++++++++++++++++++++++++++++ 2 files changed, 173 insertions(+), 89 deletions(-) create mode 100644 .github/workflows/cuda_extra.yml diff --git a/.github/workflows/cpp_extra.yml b/.github/workflows/cpp_extra.yml index e582aec31d8..7ab4c73270d 100644 --- a/.github/workflows/cpp_extra.yml +++ b/.github/workflows/cpp_extra.yml @@ -169,94 +169,6 @@ jobs: continue-on-error: true run: archery docker push ${{ matrix.image }} - cuda: - needs: check-labels - name: ${{ matrix.title }} - runs-on: "runs-on=${{ github.run_id }}/family=g4dn.xlarge/image=ubuntu24-gpu-x64/spot=capacity-optimized" - if: >- - needs.check-labels.outputs.force == 'true' || - contains(fromJSON(needs.check-labels.outputs.ci-extra-labels || '[]'), 'CI: Extra') || - contains(fromJSON(needs.check-labels.outputs.ci-extra-labels || '[]'), 'CI: Extra: C++') - timeout-minutes: 75 - strategy: - fail-fast: false - matrix: - include: - - envs: - - CUDA=13.0.2 - - UBUNTU=24.04 - image: ubuntu-cuda-cpp - title: AMD64 Ubuntu 24 CUDA 13.0.2 - - envs: - - CUDA=11.7.1 - - UBUNTU=22.04 - image: ubuntu-cuda-cpp - title: AMD64 Ubuntu 22 CUDA 11.7.1 - - envs: - - CUDA=13.0.2 - - UBUNTU=24.04 - image: ubuntu-cuda-python - title: AMD64 Ubuntu 24 CUDA Python 13.0.2 - - envs: - - CUDA=11.7.1 - - UBUNTU=22.04 - image: ubuntu-cuda-python - title: AMD64 Ubuntu 22 CUDA Python 11.7.1 - env: - ARCHERY_DEBUG: 1 - ARROW_ENABLE_TIMING_TESTS: OFF - DOCKER_VOLUME_PREFIX: ".docker/" - steps: - - name: Checkout Arrow - uses: actions/checkout@v6 - with: - fetch-depth: 0 - submodules: recursive - - name: Cache Docker Volumes - uses: actions/cache@v5 - with: - path: .docker - key: extra-${{ matrix.image }}-${{ hashFiles('cpp/**') }} - restore-keys: extra-${{ matrix.image }}- - - name: Setup Python - uses: actions/setup-python@v6 - with: - python-version: 3 - - name: Setup Archery - run: python3 -m pip install -e dev/archery[docker] - - name: Display NVIDIA SMI details - run: | - nvidia-smi - nvidia-smi -L - nvidia-smi -q -d Memory - - name: Execute Docker Build - continue-on-error: ${{ matrix.continue-on-error || false }} - env: - ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} - ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} - ENVS: ${{ toJSON(matrix.envs) }} - run: | - # GH-40558: reduce ASLR to avoid ASAN/LSAN crashes - sudo sysctl -w vm.mmap_rnd_bits=28 - source ci/scripts/util_enable_core_dumps.sh - if [ "${ENVS}" != "null" ]; then - echo "${ENVS}" | jq -r '.[]' | while read env; do - echo "${env}" >> .env - done - fi - archery docker run ${{ matrix.run-options || '' }} ${{ matrix.image }} - - name: Docker Push - if: >- - success() && - github.event_name == 'push' && - github.repository == 'apache/arrow' && - github.ref_name == 'main' - env: - ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} - ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} - continue-on-error: true - run: archery docker push ${{ matrix.image }} - msvc-arm64: needs: check-labels if: >- @@ -549,7 +461,6 @@ jobs: report-extra-cpp: if: github.event_name == 'schedule' && always() needs: - - cuda - docker - jni-linux - jni-macos diff --git a/.github/workflows/cuda_extra.yml b/.github/workflows/cuda_extra.yml new file mode 100644 index 00000000000..1c2c98bfe07 --- /dev/null +++ b/.github/workflows/cuda_extra.yml @@ -0,0 +1,173 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: CUDA Extra + +on: + push: + branches: + - '**' + - '!dependabot/**' + paths: + - '.dockerignore' + - '.github/workflows/check_labels.yml' + - '.github/workflows/cuda_extra.yml' + - '.github/workflows/report_ci.yml' + - 'ci/docker/linux-apt-python-3.dockerfile' + - 'ci/scripts/ccache_setup.sh' + - 'ci/scripts/cpp_*' + - 'ci/scripts/install_numba.sh' + - 'cpp/src/arrow/gpu/**' + - 'compose.yaml' + - 'format/Flight.proto' + - 'python/**' + tags: + - '**' + pull_request: + paths: + - '.dockerignore' + - '.github/workflows/check_labels.yml' + - '.github/workflows/cuda_extra.yml' + - '.github/workflows/report_ci.yml' + - 'ci/docker/linux-apt-python-3.dockerfile' + - 'ci/scripts/ccache_setup.sh' + - 'ci/scripts/cpp_*' + - 'ci/scripts/install_numba.sh' + - 'cpp/src/arrow/gpu/**' + - 'compose.yaml' + - 'format/Flight.proto' + - 'python/**' + types: + - labeled + - opened + - reopened + - synchronize + schedule: + - cron: | + 0 6 * * * + +concurrency: + group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} + cancel-in-progress: true + +permissions: + actions: read + contents: read + pull-requests: read + +jobs: + check-labels: + if: github.event_name != 'schedule' || github.repository == 'apache/arrow' + uses: ./.github/workflows/check_labels.yml + secrets: inherit + with: + parent-workflow: cuda_extra + + docker: + needs: check-labels + name: ${{ matrix.title }} + runs-on: "runs-on=${{ github.run_id }}/family=g4dn.xlarge/image=ubuntu24-gpu-x64/spot=capacity-optimized" + if: >- + needs.check-labels.outputs.force == 'true' || + contains(fromJSON(needs.check-labels.outputs.ci-extra-labels || '[]'), 'CI: Extra') || + contains(fromJSON(needs.check-labels.outputs.ci-extra-labels || '[]'), 'CI: Extra: CUDA') + timeout-minutes: 75 + strategy: + fail-fast: false + matrix: + include: + - envs: + - CUDA=13.0.2 + - UBUNTU=24.04 + image: ubuntu-cuda-cpp + title: AMD64 Ubuntu 24 CUDA 13.0.2 + - envs: + - CUDA=11.7.1 + - UBUNTU=22.04 + image: ubuntu-cuda-cpp + title: AMD64 Ubuntu 22 CUDA 11.7.1 + - envs: + - CUDA=13.0.2 + - UBUNTU=24.04 + image: ubuntu-cuda-python + title: AMD64 Ubuntu 24 CUDA 13.0.2 Python + - envs: + - CUDA=11.7.1 + - UBUNTU=22.04 + image: ubuntu-cuda-python + title: AMD64 Ubuntu 22 CUDA 11.7.1 Python + env: + ARCHERY_DEBUG: 1 + ARROW_ENABLE_TIMING_TESTS: OFF + DOCKER_VOLUME_PREFIX: ".docker/" + steps: + - name: Checkout Arrow + uses: actions/checkout@v6 + with: + fetch-depth: 0 + submodules: recursive + - name: Cache Docker Volumes + uses: actions/cache@v5 + with: + path: .docker + key: extra-${{ matrix.image }}-${{ hashFiles('cpp/**') }} + restore-keys: extra-${{ matrix.image }}- + - name: Setup Python + uses: actions/setup-python@v6 + with: + python-version: 3 + - name: Setup Archery + run: python3 -m pip install -e dev/archery[docker] + - name: Display NVIDIA SMI details + run: | + nvidia-smi + nvidia-smi -L + nvidia-smi -q -d Memory + - name: Execute Docker Build + continue-on-error: ${{ matrix.continue-on-error || false }} + env: + ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} + ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} + ENVS: ${{ toJSON(matrix.envs) }} + run: | + # GH-40558: reduce ASLR to avoid ASAN/LSAN crashes + sudo sysctl -w vm.mmap_rnd_bits=28 + source ci/scripts/util_enable_core_dumps.sh + if [ "${ENVS}" != "null" ]; then + echo "${ENVS}" | jq -r '.[]' | while read env; do + echo "${env}" >> .env + done + fi + archery docker run ${{ matrix.run-options || '' }} ${{ matrix.image }} + - name: Docker Push + if: >- + success() && + github.event_name == 'push' && + github.repository == 'apache/arrow' && + github.ref_name == 'main' + env: + ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} + ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} + continue-on-error: true + run: archery docker push ${{ matrix.image }} + + report-extra-cpp: + if: github.event_name == 'schedule' && always() + needs: + - docker + uses: ./.github/workflows/report_ci.yml + secrets: inherit From 74df3f51b2d46c270a46c92d29291707d58a6f69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 8 Jan 2026 11:49:39 +0100 Subject: [PATCH 15/19] Use Cuda 12.9.0 as is the last one from the instances --- .github/workflows/cuda_extra.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/cuda_extra.yml b/.github/workflows/cuda_extra.yml index 1c2c98bfe07..c9cc19582b8 100644 --- a/.github/workflows/cuda_extra.yml +++ b/.github/workflows/cuda_extra.yml @@ -91,20 +91,20 @@ jobs: matrix: include: - envs: - - CUDA=13.0.2 + - CUDA=12.9.0 - UBUNTU=24.04 image: ubuntu-cuda-cpp - title: AMD64 Ubuntu 24 CUDA 13.0.2 + title: AMD64 Ubuntu 24 CUDA 12.9.0 - envs: - CUDA=11.7.1 - UBUNTU=22.04 image: ubuntu-cuda-cpp title: AMD64 Ubuntu 22 CUDA 11.7.1 - envs: - - CUDA=13.0.2 + - CUDA=12.9.0 - UBUNTU=24.04 image: ubuntu-cuda-python - title: AMD64 Ubuntu 24 CUDA 13.0.2 Python + title: AMD64 Ubuntu 24 CUDA 12.9.0 Python - envs: - CUDA=11.7.1 - UBUNTU=22.04 From cacc1a483990b93c4260da394d8847db09250037 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 8 Jan 2026 11:52:13 +0100 Subject: [PATCH 16/19] Remove unnecessary cuda-compat-13-0 installation --- ci/scripts/install_numba.sh | 6 ------ compose.yaml | 1 - 2 files changed, 7 deletions(-) diff --git a/ci/scripts/install_numba.sh b/ci/scripts/install_numba.sh index ee750f91b1a..91c36ca5713 100755 --- a/ci/scripts/install_numba.sh +++ b/ci/scripts/install_numba.sh @@ -57,12 +57,6 @@ else cuda_version="${DEFAULT_CUDA_VERSION}" fi -# Install CUDA compatibility package for CUDA 13 -if [ "${cuda_version}" = "13" ]; then - apt update - apt install -y cuda-compat-13-0 -fi - if [ "${numba_cuda}" = "master" ]; then pip install "numba-cuda[cu${cuda_version}] @ https://github.com/NVIDIA/numba-cuda/archive/main.tar.gz" elif [ "${numba_cuda}" = "latest" ]; then diff --git a/compose.yaml b/compose.yaml index 05d0c9085f7..84481e1af76 100644 --- a/compose.yaml +++ b/compose.yaml @@ -1006,7 +1006,6 @@ services: deploy: *cuda-deploy command: &python-command > /bin/bash -c " - nvidia-smi && /arrow/ci/scripts/cpp_build.sh /arrow /build && /arrow/ci/scripts/python_build.sh /arrow /build && /arrow/ci/scripts/python_test.sh /arrow" From 452b575434564198939731cc46cd6cd50b747386 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 8 Jan 2026 14:24:29 +0100 Subject: [PATCH 17/19] Use cpp/** instead of cpp/src/arrow/gpu/** --- .github/workflows/cuda_extra.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/cuda_extra.yml b/.github/workflows/cuda_extra.yml index c9cc19582b8..d5222ba8eba 100644 --- a/.github/workflows/cuda_extra.yml +++ b/.github/workflows/cuda_extra.yml @@ -31,7 +31,7 @@ on: - 'ci/scripts/ccache_setup.sh' - 'ci/scripts/cpp_*' - 'ci/scripts/install_numba.sh' - - 'cpp/src/arrow/gpu/**' + - 'cpp/**' - 'compose.yaml' - 'format/Flight.proto' - 'python/**' @@ -47,7 +47,7 @@ on: - 'ci/scripts/ccache_setup.sh' - 'ci/scripts/cpp_*' - 'ci/scripts/install_numba.sh' - - 'cpp/src/arrow/gpu/**' + - 'cpp/**' - 'compose.yaml' - 'format/Flight.proto' - 'python/**' From 683247c3924714becdfd37020fcaabc44b20d9da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 8 Jan 2026 15:19:59 +0100 Subject: [PATCH 18/19] Remove trigger on push to branches and pull_requests paths --- .github/workflows/cuda_extra.yml | 29 ----------------------------- 1 file changed, 29 deletions(-) diff --git a/.github/workflows/cuda_extra.yml b/.github/workflows/cuda_extra.yml index d5222ba8eba..4ceb8bf99df 100644 --- a/.github/workflows/cuda_extra.yml +++ b/.github/workflows/cuda_extra.yml @@ -19,38 +19,9 @@ name: CUDA Extra on: push: - branches: - - '**' - - '!dependabot/**' - paths: - - '.dockerignore' - - '.github/workflows/check_labels.yml' - - '.github/workflows/cuda_extra.yml' - - '.github/workflows/report_ci.yml' - - 'ci/docker/linux-apt-python-3.dockerfile' - - 'ci/scripts/ccache_setup.sh' - - 'ci/scripts/cpp_*' - - 'ci/scripts/install_numba.sh' - - 'cpp/**' - - 'compose.yaml' - - 'format/Flight.proto' - - 'python/**' tags: - '**' pull_request: - paths: - - '.dockerignore' - - '.github/workflows/check_labels.yml' - - '.github/workflows/cuda_extra.yml' - - '.github/workflows/report_ci.yml' - - 'ci/docker/linux-apt-python-3.dockerfile' - - 'ci/scripts/ccache_setup.sh' - - 'ci/scripts/cpp_*' - - 'ci/scripts/install_numba.sh' - - 'cpp/**' - - 'compose.yaml' - - 'format/Flight.proto' - - 'python/**' types: - labeled - opened From c1350224749b05518841e768c292da36375ddb0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 12 Jan 2026 10:36:34 +0100 Subject: [PATCH 19/19] Simplify environment variables for cuda and ubuntu now that we are not reusing the CPP workflow --- .github/workflows/cuda_extra.yml | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/.github/workflows/cuda_extra.yml b/.github/workflows/cuda_extra.yml index 4ceb8bf99df..1700d6a8456 100644 --- a/.github/workflows/cuda_extra.yml +++ b/.github/workflows/cuda_extra.yml @@ -61,24 +61,20 @@ jobs: fail-fast: false matrix: include: - - envs: - - CUDA=12.9.0 - - UBUNTU=24.04 + - cuda: 12.9.0 + ubuntu: 24.04 image: ubuntu-cuda-cpp title: AMD64 Ubuntu 24 CUDA 12.9.0 - - envs: - - CUDA=11.7.1 - - UBUNTU=22.04 + - cuda: 11.7.1 + ubuntu: 22.04 image: ubuntu-cuda-cpp title: AMD64 Ubuntu 22 CUDA 11.7.1 - - envs: - - CUDA=12.9.0 - - UBUNTU=24.04 + - cuda: 12.9.0 + ubuntu: 24.04 image: ubuntu-cuda-python title: AMD64 Ubuntu 24 CUDA 12.9.0 Python - - envs: - - CUDA=11.7.1 - - UBUNTU=22.04 + - cuda: 11.7.1 + ubuntu: 22.04 image: ubuntu-cuda-python title: AMD64 Ubuntu 22 CUDA 11.7.1 Python env: @@ -113,16 +109,12 @@ jobs: env: ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} - ENVS: ${{ toJSON(matrix.envs) }} + CUDA: ${{ matrix.cuda }} + UBUNTU: ${{ matrix.ubuntu }} run: | # GH-40558: reduce ASLR to avoid ASAN/LSAN crashes sudo sysctl -w vm.mmap_rnd_bits=28 source ci/scripts/util_enable_core_dumps.sh - if [ "${ENVS}" != "null" ]; then - echo "${ENVS}" | jq -r '.[]' | while read env; do - echo "${env}" >> .env - done - fi archery docker run ${{ matrix.run-options || '' }} ${{ matrix.image }} - name: Docker Push if: >-