diff --git a/.github/workflows/tests-nightly.yml b/.github/workflows/tests-nightly.yml index 60ed4fe3f..09858a7c3 100644 --- a/.github/workflows/tests-nightly.yml +++ b/.github/workflows/tests-nightly.yml @@ -20,7 +20,7 @@ jobs: platform: [linux-x64, linux-aarch64, macos, windows] # default runners don't have AVX-512 support, but icelake does cpu_type: ["", icelake] - torch_version: ["2.3.1", "2.8.0", "2.9.1"] + torch_version: ["2.3.1", "2.9.1", "2.10.0"] exclude: # aarch64 minimum torch version is 2.5.1 @@ -65,13 +65,13 @@ jobs: torch_version: "2.3.1" pypi_index: "https://download.pytorch.org/whl/cu118" - cuda_version: "12.6.3" - torch_version: "2.7.1" + torch_version: "2.8.0" pypi_index: "https://download.pytorch.org/whl/cu126" - cuda_version: "12.8.1" - torch_version: "2.8.0" + torch_version: "2.9.1" pypi_index: "https://download.pytorch.org/whl/cu128" - cuda_version: "13.0.2" - torch_version: "2.9.1" + torch_version: "2.10.0" pypi_index: "https://download.pytorch.org/whl/cu130" # Windows CUDA Tests - T4 GPU (CUDA 11.8 only, multiple torch versions) diff --git a/.github/workflows/tests-pr.yml b/.github/workflows/tests-pr.yml index 7766089f6..539576bcd 100644 --- a/.github/workflows/tests-pr.yml +++ b/.github/workflows/tests-pr.yml @@ -31,7 +31,7 @@ jobs: platform: [linux-x64, linux-aarch64, macos] # default runners don't have AVX-512 support, but icelake does cpu_type: ["", icelake] - torch_version: ["2.3.1", "2.9.1"] + torch_version: ["2.3.1", "2.10.0"] exclude: # aarch64 minimum torch version is 2.5.1 @@ -73,10 +73,10 @@ jobs: torch_version: "2.3.1" pypi_index: "https://download.pytorch.org/whl/cu118" - cuda_version: "12.8.1" - torch_version: "2.8.0" + torch_version: "2.9.1" pypi_index: "https://download.pytorch.org/whl/cu128" - cuda_version: "13.0.2" - torch_version: "2.9.1" + torch_version: "2.10.0" pypi_index: "https://download.pytorch.org/whl/cu130" # Windows CUDA test - single configuration diff --git a/tests/test_functional.py b/tests/test_functional.py index d2e3f0847..a7ab4db6f 100644 --- a/tests/test_functional.py +++ b/tests/test_functional.py @@ -430,6 +430,15 @@ def test_approx_igemm(self, dim1, dim2, quant_methods, batched): @pytest.mark.parametrize("seq_dim", [16, 256], ids=id_formatter("seq_dim")) @pytest.mark.parametrize("transpose", BOOLEAN_TUPLES, ids=id_formatter("transpose")) def test_igemm(self, hidden_dim, batch_dim, transpose, seq_dim): + if ( + torch.version.cuda == "13.0" + and torch.__version__ >= (2, 10) + and not any(transpose) + and batch_dim == 256 + and seq_dim == 256 + ): + pytest.xfail("Failure due to regression in cuBLAS for CUDA Toolkit 13.0.2.") + hidden_dim = hidden_dim - (hidden_dim % 32) batch_dim = batch_dim - (batch_dim % 16) seq_dim = seq_dim - (seq_dim % 16) @@ -570,6 +579,9 @@ def min_max(x): @pytest.mark.parametrize("dim4", [32, 256], ids=id_formatter("dim4")) @pytest.mark.parametrize("transpose", BOOLEAN_TUPLES, ids=id_formatter("transpose")) def test_ibmm(self, dim1, dim2, dim3, dim4, transpose): + if torch.version.cuda == "13.0" and torch.__version__ >= (2, 10) and dim1 == 64: + pytest.xfail("Failure due to regression in cuBLAS for CUDA Toolkit 13.0.2.") + dim2 = dim2 - (dim2 % 16) dim3 = dim3 - (dim3 % 16) dim4 = dim4 - (dim4 % 16) diff --git a/tests/test_linear8bitlt.py b/tests/test_linear8bitlt.py index 83f207d42..2460099ae 100644 --- a/tests/test_linear8bitlt.py +++ b/tests/test_linear8bitlt.py @@ -243,6 +243,9 @@ def test_linear8bitlt_torch_compile(device, threshold, bias, fullgraph, mode): if device == "cuda" and platform.system() == "Windows": pytest.skip("Triton is not officially supported on Windows") + if device == "cuda" and mode == "reduce-overhead" and fullgraph and threshold > 0 and torch.__version__ >= (2, 10): + pytest.xfail("Failure due to regression in torch 2.10 related to reduced overhead mode and CUDA.") + dim = 256 batch_size = 16