bitsandbytes-foundation · matthewdouglas · Feb 18, 2026 · Feb 17, 2026 · Feb 18, 2026
diff --git a/.github/workflows/tests-nightly.yml b/.github/workflows/tests-nightly.yml
@@ -20,7 +20,7 @@ jobs:
         platform: [linux-x64, linux-aarch64, macos, windows]
         # default runners don't have AVX-512 support, but icelake does
         cpu_type: ["", icelake]
-        torch_version: ["2.3.1", "2.8.0", "2.9.1"]
+        torch_version: ["2.3.1", "2.9.1", "2.10.0"]
 
         exclude:
           # aarch64 minimum torch version is 2.5.1
@@ -65,13 +65,13 @@ jobs:
             torch_version: "2.3.1"
             pypi_index: "https://download.pytorch.org/whl/cu118"
           - cuda_version: "12.6.3"
-            torch_version: "2.7.1"
+            torch_version: "2.8.0"
             pypi_index: "https://download.pytorch.org/whl/cu126"
           - cuda_version: "12.8.1"
-            torch_version: "2.8.0"
+            torch_version: "2.9.1"
             pypi_index: "https://download.pytorch.org/whl/cu128"
           - cuda_version: "13.0.2"
-            torch_version: "2.9.1"
+            torch_version: "2.10.0"
             pypi_index: "https://download.pytorch.org/whl/cu130"
 
           # Windows CUDA Tests - T4 GPU (CUDA 11.8 only, multiple torch versions)

diff --git a/.github/workflows/tests-pr.yml b/.github/workflows/tests-pr.yml
@@ -31,7 +31,7 @@ jobs:
         platform: [linux-x64, linux-aarch64, macos]
         # default runners don't have AVX-512 support, but icelake does
         cpu_type: ["", icelake]
-        torch_version: ["2.3.1", "2.9.1"]
+        torch_version: ["2.3.1", "2.10.0"]
 
         exclude:
           # aarch64 minimum torch version is 2.5.1
@@ -73,10 +73,10 @@ jobs:
             torch_version: "2.3.1"
             pypi_index: "https://download.pytorch.org/whl/cu118"
           - cuda_version: "12.8.1"
-            torch_version: "2.8.0"
+            torch_version: "2.9.1"
             pypi_index: "https://download.pytorch.org/whl/cu128"
           - cuda_version: "13.0.2"
-            torch_version: "2.9.1"
+            torch_version: "2.10.0"
             pypi_index: "https://download.pytorch.org/whl/cu130"
 
           # Windows CUDA test - single configuration

diff --git a/tests/test_functional.py b/tests/test_functional.py
@@ -430,6 +430,15 @@ def test_approx_igemm(self, dim1, dim2, quant_methods, batched):
     @pytest.mark.parametrize("seq_dim", [16, 256], ids=id_formatter("seq_dim"))
     @pytest.mark.parametrize("transpose", BOOLEAN_TUPLES, ids=id_formatter("transpose"))
     def test_igemm(self, hidden_dim, batch_dim, transpose, seq_dim):
+        if (
+            torch.version.cuda == "13.0"
+            and torch.__version__ >= (2, 10)
+            and not any(transpose)
+            and batch_dim == 256
+            and seq_dim == 256
+        ):
+            pytest.xfail("Failure due to regression in cuBLAS for CUDA Toolkit 13.0.2.")
+
         hidden_dim = hidden_dim - (hidden_dim % 32)
         batch_dim = batch_dim - (batch_dim % 16)
         seq_dim = seq_dim - (seq_dim % 16)
@@ -570,6 +579,9 @@ def min_max(x):
     @pytest.mark.parametrize("dim4", [32, 256], ids=id_formatter("dim4"))
     @pytest.mark.parametrize("transpose", BOOLEAN_TUPLES, ids=id_formatter("transpose"))
     def test_ibmm(self, dim1, dim2, dim3, dim4, transpose):
+        if torch.version.cuda == "13.0" and torch.__version__ >= (2, 10) and dim1 == 64:
+            pytest.xfail("Failure due to regression in cuBLAS for CUDA Toolkit 13.0.2.")
+
         dim2 = dim2 - (dim2 % 16)
         dim3 = dim3 - (dim3 % 16)
         dim4 = dim4 - (dim4 % 16)

diff --git a/tests/test_linear8bitlt.py b/tests/test_linear8bitlt.py
@@ -243,6 +243,9 @@ def test_linear8bitlt_torch_compile(device, threshold, bias, fullgraph, mode):
     if device == "cuda" and platform.system() == "Windows":
         pytest.skip("Triton is not officially supported on Windows")
 
+    if device == "cuda" and mode == "reduce-overhead" and fullgraph and threshold > 0 and torch.__version__ >= (2, 10):
+        pytest.xfail("Failure due to regression in torch 2.10 related to reduced overhead mode and CUDA.")
+
     dim = 256
     batch_size = 16