From 39400a6c3b1339fe7306d5e3ba823cf9ba0b05a6 Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Fri, 23 May 2025 15:51:31 -0400 Subject: [PATCH 1/5] General cleanup & test improvements --- .github/workflows/tests.yml | 11 +- benchmarking/int8/row_scale_benchmark.py | 70 ------- deploy.sh | 237 ----------------------- environment-bnb.yml | 21 -- environment.yml | 46 ----- tests/test_autograd.py | 7 + tests/test_functional.py | 8 +- tests/test_ops.py | 33 ++-- tests/test_triton.py | 2 +- 9 files changed, 40 insertions(+), 395 deletions(-) delete mode 100644 benchmarking/int8/row_scale_benchmark.py delete mode 100644 deploy.sh delete mode 100644 environment-bnb.yml delete mode 100644 environment.yml diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 5d2a2708b..2d206b76a 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -93,14 +93,15 @@ jobs: path: output/${{ matrix.os }}/${{ matrix.arch }}/* retention-days: 7 - cpu-tests: + test-cpu: if: github.repository == 'bitsandbytes-foundation/bitsandbytes' needs: build-cpu strategy: fail-fast: false matrix: os: [ubuntu-22.04, ubuntu-22.04-arm, windows-2025, macos-15] - torch_version: ["2.6.0", "2.7.0"] + # Test with the oldest supported torch version and the two newest. + torch_version: ["2.2.2", "2.6.0", "2.7.0"] include: - os: ubuntu-22.04 arch: x86_64 @@ -144,7 +145,7 @@ jobs: - name: Run tests run: pytest --durations=100 - # cuda-aarch64-tests: + # test-cuda-aarch64: # if: github.repository == 'bitsandbytes-foundation/bitsandbytes' # needs: build-cuda # strategy: @@ -167,7 +168,7 @@ jobs: - cuda-tests: + test-cuda: if: github.repository == 'bitsandbytes-foundation/bitsandbytes' needs: build-cuda strategy: @@ -179,7 +180,7 @@ jobs: cuda_version: ["11.8.0", "12.6.3", "12.8.1"] include: - cuda_version: "11.8.0" - torch_version: "2.4.1" + torch_version: "2.2.2" pypi_index: "https://download.pytorch.org/whl/cu118" - cuda_version: "12.6.3" torch_version: "2.6.0" diff --git a/benchmarking/int8/row_scale_benchmark.py b/benchmarking/int8/row_scale_benchmark.py deleted file mode 100644 index 98d2496de..000000000 --- a/benchmarking/int8/row_scale_benchmark.py +++ /dev/null @@ -1,70 +0,0 @@ -""" -Extracted from tests/test_functional.py - -Note: This feature is currently unused! It is kept here for archival purposes. - -Usage: pytest benchmarking/int8/row_scale_benchmark.py -""" - -import time - -import pytest -import torch - -from bitsandbytes import functional as F - -k = 20 -torch.set_printoptions(precision=5, sci_mode=False, linewidth=120, edgeitems=20, threshold=10000) - - -@pytest.mark.parametrize( - ("dim1", "dim4", "inner"), - [ - pytest.param(1024, 12288 * 4, 12288, id="1024, 12288*4, 12288"), - pytest.param(2048, 4096 * 4, 4096, id="2048, 4096*4, 4096"), - ], -) -@pytest.mark.skip("Row scale has some bugs for ampere") -@pytest.mark.benchmark -def test_row_scale_bench(dim1, dim4, inner): - formatB = F.get_special_format_str() - err1, err2, err3 = [], [], [] - relerr1, relerr2 = [], [] - scale = 1 - A = torch.randn(dim1, inner, device="cuda").half() - B = torch.randn(dim4, inner, device="cuda").half() - torch.nn.init.xavier_uniform_(B) - # warmpup - for i in range(k): - C1 = torch.matmul(A, B.t()) - - torch.cuda.synchronize() - t0 = time.time() - for i in range(k): - C1 = torch.matmul(A, B.t()) - torch.cuda.synchronize() - print("16", time.time() - t0) - - C1a, C1b, stats1a, stats1b, coo_tensor = F.int8_double_quant(A) - CB, absmaxB = F.vectorwise_quant(B, quant_type="linear") - A2, SA = F.nvidia_transform(C1a, "col32") - B2, SB = F.nvidia_transform(CB, formatB) - A1, maxA = F.vectorwise_quant(A, dim=1) - - c = 10.0 * inner * scale - row_scale = maxA / c - torch.cuda.synchronize() - t0 = time.time() - for i in range(k): - outC32 = F.int8_linear_matmul(A2, B2, dtype=torch.int8, row_scale=row_scale) - torch.cuda.synchronize() - print("row-wise", time.time() - t0) - - C2a, C2b, stats2a, stats2b, coo_tensor = F.int8_double_quant(B) - B2, SB = F.nvidia_transform(C2a, formatB) - torch.cuda.synchronize() - t0 = time.time() - for i in range(k): - outC32 = F.int8_linear_matmul(A2, B2) - torch.cuda.synchronize() - print("vector-wise", time.time() - t0) diff --git a/deploy.sh b/deploy.sh deleted file mode 100644 index e60373627..000000000 --- a/deploy.sh +++ /dev/null @@ -1,237 +0,0 @@ -#!/bin/bash -BASE_PATH=$1 - -echo "MAKE SURE LD_LIBRARY_PATH IS EMPTY!" -echo $LD_LIBRARY_PATH - -if [[ ! -z "${LD_LIBRARY_PATH}" ]]; then - echo "Compilation unsuccessful!" 1>&2 - exit 64 -fi - - -module unload cuda && echo "no module function available. Probably not on a slurm cluster." -module unload gcc && echo "no module function available. Probably not on a slurm cluster." - -rm -rf dist build -make cleaneggs -make cleanlibs - -rm -rf build/* -export CUDA_HOME= -export CUDA_VERSION= -make cpuonly CUDA_VERSION="CPU" - -if [ ! -f "./bitsandbytes/libbitsandbytes_cpu.so" ]; then - # Control will enter here if $DIRECTORY doesn't exist. - echo "Compilation unsuccessful!" 1>&2 - exit 64 -fi - -rm -rf build/* -export CUDA_HOME=$BASE_PATH/cuda-11.0 -make cuda110 CUDA_VERSION=110 - -if [ ! -f "./bitsandbytes/libbitsandbytes_cuda110.so" ]; then - # Control will enter here if $DIRECTORY doesn't exist. - echo "Compilation unsuccessful!" 1>&2 - exit 64 -fi - -rm -rf build/* -export CUDA_HOME=$BASE_PATH/cuda-11.1 -make cuda11x CUDA_VERSION=111 - -if [ ! -f "./bitsandbytes/libbitsandbytes_cuda111.so" ]; then - # Control will enter here if $DIRECTORY doesn't exist. - echo "Compilation unsuccessful!" 1>&2 - exit 64 -fi - -rm -rf build/* -export CUDA_HOME=$BASE_PATH/cuda-11.4 -make cuda11x CUDA_VERSION=114 - -if [ ! -f "./bitsandbytes/libbitsandbytes_cuda114.so" ]; then - # Control will enter here if $DIRECTORY doesn't exist. - echo "Compilation unsuccessful!" 1>&2 - exit 64 -fi - -rm -rf build/* -export CUDA_HOME=$BASE_PATH/cuda-11.5 -make cuda11x CUDA_VERSION=115 - -if [ ! -f "./bitsandbytes/libbitsandbytes_cuda115.so" ]; then - # Control will enter here if $DIRECTORY doesn't exist. - echo "Compilation unsuccessful!" 1>&2 - exit 64 -fi - -rm -rf build/* -export CUDA_HOME=$BASE_PATH/cuda-11.7 -make cuda11x CUDA_VERSION=117 - -if [ ! -f "./bitsandbytes/libbitsandbytes_cuda117.so" ]; then - # Control will enter here if $DIRECTORY doesn't exist. - echo "Compilation unsuccessful!" 1>&2 - exit 64 -fi - -rm -rf build/* -export CUDA_HOME=$BASE_PATH/cuda-11.8 -make cuda118 CUDA_VERSION=118 - -if [ ! -f "./bitsandbytes/libbitsandbytes_cuda118.so" ]; then - # Control will enter here if $DIRECTORY doesn't exist. - echo "Compilation unsuccessful!" 1>&2 - exit 64 -fi - -rm -rf build/* -export CUDA_HOME=$BASE_PATH/cuda-12.0 -make cuda12x CUDA_VERSION=120 - -if [ ! -f "./bitsandbytes/libbitsandbytes_cuda120.so" ]; then - # Control will enter here if $DIRECTORY doesn't exist. - echo "Compilation unsuccessful!" 1>&2 - exit 64 -fi - -rm -rf build/* -export CUDA_HOME=$BASE_PATH/cuda-12.1 -make cuda12x CUDA_VERSION=121 - -if [ ! -f "./bitsandbytes/libbitsandbytes_cuda121.so" ]; then - # Control will enter here if $DIRECTORY doesn't exist. - echo "Compilation unsuccessful!" 1>&2 - exit 64 -fi - -rm -rf build/* -export CUDA_HOME=$BASE_PATH/cuda-12.2 -make cuda12x CUDA_VERSION=122 - -if [ ! -f "./bitsandbytes/libbitsandbytes_cuda122.so" ]; then - # Control will enter here if $DIRECTORY doesn't exist. - echo "Compilation unsuccessful!" 1>&2 - exit 64 -fi - -rm -rf build/* -export CUDA_HOME=$BASE_PATH/cuda-12.3 -make cuda12x CUDA_VERSION=123 - -if [ ! -f "./bitsandbytes/libbitsandbytes_cuda123.so" ]; then - # Control will enter here if $DIRECTORY doesn't exist. - echo "Compilation unsuccessful!" 1>&2 - exit 64 -fi - -############################# START NO CUBLASLT ############################################# -# binaries without 8-bit matmul support START HERE -# ########################################################################################### - -rm -rf build/* -export CUDA_HOME=$BASE_PATH/cuda-11.0 -make cuda110_nomatmul CUDA_VERSION=110 - -if [ ! -f "./bitsandbytes/libbitsandbytes_cuda110_nocublaslt.so" ]; then - # Control will enter here if $DIRECTORY doesn't exist. - echo "Compilation unsuccessful!" 1>&2 - exit 64 -fi - - -rm -rf build/* -export CUDA_HOME=$BASE_PATH/cuda-11.1 -make cuda11x_nomatmul CUDA_VERSION=111 - -if [ ! -f "./bitsandbytes/libbitsandbytes_cuda111_nocublaslt.so" ]; then - # Control will enter here if $DIRECTORY doesn't exist. - echo "Compilation unsuccessful!" 1>&2 - exit 64 -fi - -rm -rf build/* -export CUDA_HOME=$BASE_PATH/cuda-11.4 -make cuda11x_nomatmul CUDA_VERSION=114 - -if [ ! -f "./bitsandbytes/libbitsandbytes_cuda114_nocublaslt.so" ]; then - # Control will enter here if $DIRECTORY doesn't exist. - echo "Compilation unsuccessful!" 1>&2 - exit 64 -fi - -rm -rf build/* -export CUDA_HOME=$BASE_PATH/cuda-11.5 -make cuda11x_nomatmul CUDA_VERSION=115 - -if [ ! -f "./bitsandbytes/libbitsandbytes_cuda115_nocublaslt.so" ]; then - # Control will enter here if $DIRECTORY doesn't exist. - echo "Compilation unsuccessful!" 1>&2 - exit 64 -fi - -rm -rf build/* -export CUDA_HOME=$BASE_PATH/cuda-11.7 -make cuda11x_nomatmul CUDA_VERSION=117 - -if [ ! -f "./bitsandbytes/libbitsandbytes_cuda117_nocublaslt.so" ]; then - # Control will enter here if $DIRECTORY doesn't exist. - echo "Compilation unsuccessful!" 1>&2 - exit 64 -fi - -rm -rf build/* -export CUDA_HOME=$BASE_PATH/cuda-11.8 -make cuda118_nomatmul CUDA_VERSION=118 - -if [ ! -f "./bitsandbytes/libbitsandbytes_cuda118_nocublaslt.so" ]; then - # Control will enter here if $DIRECTORY doesn't exist. - echo "Compilation unsuccessful!" 1>&2 - exit 64 -fi - -rm -rf build/* -export CUDA_HOME=$BASE_PATH/cuda-12.0 -make cuda12x_nomatmul CUDA_VERSION=120 - -if [ ! -f "./bitsandbytes/libbitsandbytes_cuda120_nocublaslt.so" ]; then - # Control will enter here if $DIRECTORY doesn't exist. - echo "Compilation unsuccessful!" 1>&2 - exit 64 -fi - -rm -rf build/* -export CUDA_HOME=$BASE_PATH/cuda-12.1 -make cuda12x_nomatmul CUDA_VERSION=121 - -if [ ! -f "./bitsandbytes/libbitsandbytes_cuda121_nocublaslt.so" ]; then - # Control will enter here if $DIRECTORY doesn't exist. - echo "Compilation unsuccessful!" 1>&2 - exit 64 -fi - -rm -rf build/* -export CUDA_HOME=$BASE_PATH/cuda-12.2 -make cuda12x_nomatmul CUDA_VERSION=122 - -if [ ! -f "./bitsandbytes/libbitsandbytes_cuda122_nocublaslt.so" ]; then - # Control will enter here if $DIRECTORY doesn't exist. - echo "Compilation unsuccessful!" 1>&2 - exit 64 -fi - -rm -rf build/* -export CUDA_HOME=$BASE_PATH/cuda-12.3 -make cuda12x_nomatmul CUDA_VERSION=123 - -if [ ! -f "./bitsandbytes/libbitsandbytes_cuda123_nocublaslt.so" ]; then - # Control will enter here if $DIRECTORY doesn't exist. - echo "Compilation unsuccessful!" 1>&2 - exit 64 -fi - -python -m build -python -m twine upload dist/* --verbose diff --git a/environment-bnb.yml b/environment-bnb.yml deleted file mode 100644 index 1214f7930..000000000 --- a/environment-bnb.yml +++ /dev/null @@ -1,21 +0,0 @@ -# for cmake build -name: bnb -channels: - - pytorch - - nvidia - - conda-forge - -dependencies: - - python - #- accelerate - #- einops - - scipy - #- transformers - - pytest - - pytest-cases - - ipython - - debugpy - - yapf - - monkeytype - - rich - - pytest-sugar diff --git a/environment.yml b/environment.yml deleted file mode 100644 index af421b3c6..000000000 --- a/environment.yml +++ /dev/null @@ -1,46 +0,0 @@ -name: bnb -channels: - - pytorch - - nvidia - - conda-forge - -dependencies: - # Base - - conda-forge::python=3.8 - - pytorch::pytorch=>2.1 - - pytorch::pytorch-cuda=11.8 - - nvidia::cuda=11.8 - # Libraries - - conda-forge::accelerate - - conda-forge::einops - - conda-forge::scipy - - conda-forge::transformers - # Development - - conda-forge::pytest - - conda-forge::build # build Python packages - - conda-forge::twine # upload Python packages - - conda-forge::pytest-cases # more readable and composable parametrized tests - - conda-forge::ipython # better interactive shell - - conda-forge::debugpy # debugger-support for VSCode - - conda-forge::ruff # linting - - conda-forge::yapf # code formatting - - conda-forge::monkeytype # infer type annotations - - conda-forge::rich # better, colored tracebacks, etc - - conda-forge::pytest-sugar # better pytest output - # - conda-forge::nodejs # for `doc-builder preview` (optional) - -## ENV CREATION - steps to reproduce: -# mamba env remove -n bnb -# mamba create -y -n bnb python=3.8 # creating an empty env bypasses conda -# # and leads to much faster env resolution in the next step https://github.com/mamba-org/mamba/issues/633#issuecomment-812272143 -# mamba env update -n bnb -f environment.yml -# mamba activate bnb - -## PIP dependencies (install *after* ENV CREATION): -# pip install --no-cache-dir --no-deps lion_pytorch triton hf-doc-builder watchdog -## NOTE: conda peft is not up to date, so we install from pip -# cd pip install -e . ## installs bitsandbytes as editable development install from within repo root dir - -## ENV UPDATE: -# # add new packages to environment.yml, then: -# mamba env update -n bnb -f environment.yml diff --git a/tests/test_autograd.py b/tests/test_autograd.py index b6ba284c9..fc2e7aa6f 100644 --- a/tests/test_autograd.py +++ b/tests/test_autograd.py @@ -49,6 +49,10 @@ def test_matmullt( req_grad = list(req_grad) req_grad[2] = False + if device == "cpu" and dtype != torch.float32 and has_fp16_weights and any(req_grad): + if torch.__version__ < (2, 6): + pytest.xfail("mse_loss bf16/fp16 on CPU is not supported in torch < 2.6") + for i in range(3): # normal multiply if funcs[0] in [torch.mm, torch.matmul]: @@ -185,6 +189,9 @@ def test_matmul_4bit( req_grad = list(req_grad) req_grad[2] = False + if device == "cpu" and dtype != torch.float32 and any(req_grad) and torch.__version__ < (2, 6): + pytest.xfail("mse_loss fp16 on CPU is not supported in torch < 2.6") + for i in range(3): # normal multiply if funcs[0] in [torch.mm, torch.matmul]: diff --git a/tests/test_functional.py b/tests/test_functional.py index 0b9390aaa..8568d45f0 100644 --- a/tests/test_functional.py +++ b/tests/test_functional.py @@ -1342,8 +1342,12 @@ def test_gemv_4bit(self, device, dim, dtype, storage_type, quant_storage, double @pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16, torch.float32], ids=describe_dtype) @pytest.mark.parametrize("double_quant", [False], ids=["DQ_True"]) def test_gemv_eye_4bit(self, device, storage_type, dtype, double_quant): - if device == "cpu" and storage_type != "nf4": - pytest.xfail("fp4 quantization is not supported on CPU") + if device == "cpu": + if storage_type != "nf4": + pytest.xfail("fp4 quantization is not supported on CPU") + + if dtype == torch.bfloat16 and torch.__version__ < (2, 3): + pytest.xfail("eye doe not support bfloat16 on CPU in torch < 2.3") dims = 10 torch.random.manual_seed(np.random.randint(0, 412424242)) diff --git a/tests/test_ops.py b/tests/test_ops.py index 4da1663f0..e85bc0ef0 100644 --- a/tests/test_ops.py +++ b/tests/test_ops.py @@ -6,6 +6,13 @@ import bitsandbytes from tests.helpers import TRUE_FALSE, get_available_devices, id_formatter +# torch.library.opcheck is only available in torch 2.4 and later. +# When testing with older versions, we will skip it as a no-op. +if torch.__version__ >= (2, 4): + opcheck = torch.library.opcheck +else: + opcheck = lambda *args, **kwargs: None + class TestLLMInt8Ops: @pytest.mark.parametrize("device", get_available_devices()) @@ -18,7 +25,7 @@ def test_int8_linear_matmul(self, device): assert out.dtype == torch.int32 assert out.device == A.device - torch.library.opcheck(torch.ops.bitsandbytes.int8_linear_matmul.default, (A, B)) + opcheck(torch.ops.bitsandbytes.int8_linear_matmul.default, (A, B)) @pytest.mark.parametrize("device", get_available_devices()) def test_int8_linear_matmul_out(self, device): @@ -32,7 +39,7 @@ def test_int8_linear_matmul_out(self, device): assert out.dtype == torch.int32 assert out.device == A.device - torch.library.opcheck(torch.ops.bitsandbytes.int8_linear_matmul.out, (A, B, out)) + opcheck(torch.ops.bitsandbytes.int8_linear_matmul.out, (A, B, out)) @pytest.mark.parametrize("threshold", [0.0, 6.0]) @pytest.mark.parametrize("device", get_available_devices()) @@ -57,9 +64,8 @@ def test_int8_vectorwise_quant(self, threshold, device): else: assert outlier_cols is None - torch.library.opcheck(torch.ops.bitsandbytes.int8_vectorwise_quant, (A,)) - - torch.library.opcheck(torch.ops.bitsandbytes.int8_vectorwise_quant, (A, threshold)) + opcheck(torch.ops.bitsandbytes.int8_vectorwise_quant, (A,)) + opcheck(torch.ops.bitsandbytes.int8_vectorwise_quant, (A, threshold)) @pytest.mark.parametrize("device", get_available_devices()) def test_int8_mm_dequant(self, device): @@ -72,7 +78,7 @@ def test_int8_mm_dequant(self, device): assert out.dtype == torch.float16 assert out.device == A.device - torch.library.opcheck(torch.ops.bitsandbytes.int8_mm_dequant, (A, row_stats, col_stats)) + opcheck(torch.ops.bitsandbytes.int8_mm_dequant, (A, row_stats, col_stats)) @pytest.mark.parametrize("device", get_available_devices()) @pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16, torch.float32], ids=id_formatter("dtype")) @@ -89,7 +95,7 @@ def test_int8_scaled_mm(self, device, dtype, has_bias): assert out.dtype == dtype assert out.device == A.device - torch.library.opcheck(torch.ops.bitsandbytes.int8_scaled_mm, (A, B, row_stats, col_stats, bias, dtype)) + opcheck(torch.ops.bitsandbytes.int8_scaled_mm, (A, B, row_stats, col_stats, bias, dtype)) class TestInt8BlockwiseQuantOps: @@ -115,7 +121,7 @@ def test_quantize_blockwise(self, device, dtype, blocksize): assert absmax.device == A.device assert absmax.dtype == torch.float32 - torch.library.opcheck(torch.ops.bitsandbytes.quantize_blockwise, (A, code, blocksize)) + opcheck(torch.ops.bitsandbytes.quantize_blockwise, (A, code, blocksize)) @pytest.mark.parametrize("device", get_available_devices()) @pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16, torch.float32], ids=id_formatter("dtype")) @@ -137,7 +143,7 @@ def test_dequantize_blockwise(self, device, dtype, blocksize): assert out.dtype == dtype assert out.device == A.device - torch.library.opcheck(torch.ops.bitsandbytes.dequantize_blockwise.default, (A, absmax, code, blocksize, dtype)) + opcheck(torch.ops.bitsandbytes.dequantize_blockwise.default, (A, absmax, code, blocksize, dtype)) class Test4bitBlockwiseQuantOps: @@ -163,7 +169,7 @@ def test_quantize_4bit(self, device, dtype, storage_dtype, quant_type, blocksize assert absmax.device == A.device assert absmax.dtype == torch.float32 - torch.library.opcheck(torch.ops.bitsandbytes.quantize_4bit, (A, blocksize, quant_type, storage_dtype)) + opcheck(torch.ops.bitsandbytes.quantize_4bit, (A, blocksize, quant_type, storage_dtype)) @pytest.mark.parametrize("device", get_available_devices()) @pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16, torch.float32], ids=id_formatter("dtype")) @@ -198,8 +204,9 @@ def test_dequantize_4bit(self, device, dtype, storage_dtype, quant_type, blocksi assert out.device == A.device assert out.shape == shape - torch.library.opcheck( - torch.ops.bitsandbytes.dequantize_4bit.default, (A, absmax, blocksize, quant_type, shape, dtype) + opcheck( + torch.ops.bitsandbytes.dequantize_4bit.default, + (A, absmax, blocksize, quant_type, shape, dtype), ) @pytest.mark.parametrize("device", get_available_devices()) @@ -226,4 +233,4 @@ def test_gemv_4bit(self, device, dtype, storage_dtype, quant_type, blocksize): assert out.shape == (1, 1, out_features) assert out.isreal().all() - torch.library.opcheck(torch.ops.bitsandbytes.gemv_4bit.default, (A, B_q, B.shape, absmax, code, blocksize)) + opcheck(torch.ops.bitsandbytes.gemv_4bit.default, (A, B_q, B.shape, absmax, code, blocksize)) diff --git a/tests/test_triton.py b/tests/test_triton.py index 70656a56f..b245e534a 100644 --- a/tests/test_triton.py +++ b/tests/test_triton.py @@ -11,7 +11,7 @@ not is_triton_available() or not torch.cuda.is_available() or not torch.cuda.get_device_capability()[0] >= 8, reason="This test requires triton and a GPU with compute capability 8.0 or higher.", ) -@pytest.mark.skip("No longer supported.") +@pytest.mark.deprecated @pytest.mark.parametrize("vector_wise_quantization", TRUE_FALSE) def test_switchback(vector_wise_quantization): for dim in [83]: From bd49a46e0dbcb9f04b89d14b8d4df2cea9a6ee41 Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Fri, 23 May 2025 16:20:19 -0400 Subject: [PATCH 2/5] Tests: WA numpy 2 compat issue for torch<2.3 --- .github/workflows/tests.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 2d206b76a..df154335d 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -136,6 +136,11 @@ jobs: pip install -e ".[test]" pip install pytest-cov + # We need to downgrade to numpy<2 for torch<2.3 compatibility. + - name: Downgrade NumPy + if: startsWith(matrix.torch_version, '2.2.') + run: pip install "numpy<2" + - name: Show installed packages run: pip list @@ -239,6 +244,11 @@ jobs: pip install -e ".[test]" pip install pytest-cov + # We need to downgrade to numpy<2 for torch<2.3 compatibility. + - name: Downgrade NumPy + if: startsWith(matrix.torch_version, '2.2.') + run: pip install "numpy<2" + - name: Show installed packages run: pip list From 3fa8e9234d3d0b5550b0f5b0633f18725f41a27f Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Fri, 23 May 2025 17:18:40 -0400 Subject: [PATCH 3/5] Tests: update aarch64 cpu min torch version --- .github/workflows/tests.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index df154335d..a788c2bcb 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -108,10 +108,17 @@ jobs: runner: banb-aws-general-8-plus-use1-public-80 - os: ubuntu-22.04-arm arch: aarch64 + - os: ubuntu-22.04-arm + arch: aarch64 + torch_version: "2.3.1" - os: windows-2025 arch: x86_64 - os: macos-15 arch: arm64 + exclude: + - os: ubuntu-22.04-arm + torch_version: "2.2.2" + runs-on: ${{ matrix.runner || matrix.os }} env: BNB_TEST_DEVICE: cpu From 2a11fa8b5a0639e2308d9ff0d35355a99f82965d Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Fri, 23 May 2025 17:31:20 -0400 Subject: [PATCH 4/5] Tests: update aarch64 cpu min torch version --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index a788c2bcb..1939b9961 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -110,7 +110,7 @@ jobs: arch: aarch64 - os: ubuntu-22.04-arm arch: aarch64 - torch_version: "2.3.1" + torch_version: "2.4.1" - os: windows-2025 arch: x86_64 - os: macos-15 From 6c085212b6216bd50e86b690689ae54b526f743a Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Fri, 23 May 2025 18:06:39 -0400 Subject: [PATCH 5/5] Tests: update aarch64 cpu min torch version --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 1939b9961..f1a5dca69 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -110,7 +110,7 @@ jobs: arch: aarch64 - os: ubuntu-22.04-arm arch: aarch64 - torch_version: "2.4.1" + torch_version: "2.5.1" - os: windows-2025 arch: x86_64 - os: macos-15