From d3d68406f6acd998f3c1efad456dbb35013faf84 Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Thu, 12 Jun 2025 12:10:14 -0400 Subject: [PATCH 01/22] Setup XPU CI --- .github/workflows/tests.yml | 44 +++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index c6423b1f8..d5c7c2382 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -222,6 +222,50 @@ jobs: # - name: Show pip packages # run: pip list + test-xpu: + if: github.repository == 'bitsandbytes-foundation/bitsandbytes' + needs: build-cpu + runs-on: + group: bandb-itac-bmsprpvc1550-8-1gpu + env: + BNB_TEST_DEVICE: xpu + steps: + - name: Show system information + run: | + echo "OS: $(uname -a)" + echo "CPU: $(lscpu | grep 'Model name')" + echo "Memory: $(free -h)" + + - name: Show XPU Information + run: xpu-smi + + - uses: actions/checkout@v4 + + - name: Download build artifact + uses: actions/download-artifact@v4 + with: + name: lib_cpu_ubuntu-22.04_x86_64 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: 3.9 + + - name: Install dependencies + run: | + pip install torch==2.7.1 --index-url https://download.pytorch.org/whl/xpu + + pip install -e ".[test]" + pip install pytest-cov + + - name: Show installed packages + run: pip list + + - name: Show environment information + run: | + python -m torch.utils.collect_env + python -m bitsandbytes + test-cuda: if: github.repository == 'bitsandbytes-foundation/bitsandbytes' needs: build-cuda From 6e0622c2127d8f03aad69aea292f8040a31479fe Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Thu, 12 Jun 2025 12:38:10 -0400 Subject: [PATCH 02/22] CI: expand XPU matrix --- .github/workflows/tests.yml | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index d5c7c2382..d7a62eaae 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -225,6 +225,18 @@ jobs: test-xpu: if: github.repository == 'bitsandbytes-foundation/bitsandbytes' needs: build-cpu + strategy: + fail-fast: false + matrix: + torch_version: ["2.6.0", "2.7.1"] + ipex: [true, false] + include: + - torch_version: "2.6.0" + ipex: true + ipex_version: "2.6.10+xpu" + - torch_version: "2.7.1" + ipex: true + ipex_version: "2.7.10+xpu" runs-on: group: bandb-itac-bmsprpvc1550-8-1gpu env: @@ -237,7 +249,7 @@ jobs: echo "Memory: $(free -h)" - name: Show XPU Information - run: xpu-smi + run: xpu-smi discovery - uses: actions/checkout@v4 @@ -245,16 +257,23 @@ jobs: uses: actions/download-artifact@v4 with: name: lib_cpu_ubuntu-22.04_x86_64 + path: bitsandbytes/ + merge-multiple: true - name: Setup Python uses: actions/setup-python@v5 with: python-version: 3.9 + - name: Install PyTorch + run: pip install torch==${{ matrix.torch_version }} --index-url https://download.pytorch.org/whl/xpu + + - name: Install IPEX + if: matrix.ipex == true + run: pip install intel_extension_for_pytorch==${{ matrix.ipex_version }} --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/ + - name: Install dependencies run: | - pip install torch==2.7.1 --index-url https://download.pytorch.org/whl/xpu - pip install -e ".[test]" pip install pytest-cov @@ -265,6 +284,8 @@ jobs: run: | python -m torch.utils.collect_env python -m bitsandbytes + - name: Run tests + run: pytest --durations=100 test-cuda: if: github.repository == 'bitsandbytes-foundation/bitsandbytes' From 0623333073ef01b104d0290a2d9483587b904289 Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Thu, 12 Jun 2025 12:52:40 -0400 Subject: [PATCH 03/22] test --- .github/workflows/tests.yml | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index d7a62eaae..8549a6ed9 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -228,15 +228,16 @@ jobs: strategy: fail-fast: false matrix: - torch_version: ["2.6.0", "2.7.1"] - ipex: [true, false] - include: - - torch_version: "2.6.0" - ipex: true - ipex_version: "2.6.10+xpu" - - torch_version: "2.7.1" - ipex: true - ipex_version: "2.7.10+xpu" + torch_version: ["2.7.1"] #["2.6.0", "2.7.1"] + ipex: [false] + # ipex: [true, false] + # include: + # - torch_version: "2.6.0" + # ipex: true + # ipex_version: "2.6.10+xpu" + # - torch_version: "2.7.1" + # ipex: true + # ipex_version: "2.7.10+xpu" runs-on: group: bandb-itac-bmsprpvc1550-8-1gpu env: @@ -249,7 +250,9 @@ jobs: echo "Memory: $(free -h)" - name: Show XPU Information - run: xpu-smi discovery + run: | + xpu-smi discovery + hwinfo --display - uses: actions/checkout@v4 @@ -284,8 +287,9 @@ jobs: run: | python -m torch.utils.collect_env python -m bitsandbytes - - name: Run tests - run: pytest --durations=100 + + # - name: Run tests + # run: pytest --durations=100 test-cuda: if: github.repository == 'bitsandbytes-foundation/bitsandbytes' From 43b7d5185aae594a5acdf60a5a0cd385c98e82df Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Thu, 12 Jun 2025 13:38:45 -0400 Subject: [PATCH 04/22] test --- .github/workflows/tests.yml | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 8549a6ed9..8af930945 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -222,6 +222,28 @@ jobs: # - name: Show pip packages # run: pip list + test-hpu: + if: github.repository == 'bitsandbytes-foundation/bitsandbytes' + needs: build-cpu + strategy: + fail-fast: false + matrix: + torch_version: ["2.6.0"] + runs-on: + group: bandb-itac-bmemr-gaudi3-1gaudi + env: + BNB_TEST_DEVICE: hpu + steps: + - name: Show system information + run: | + echo "OS: $(uname -a)" + echo "CPU: $(lscpu | grep 'Model name')" + echo "Memory: $(free -h)" + + - name: Show HPU Information + run: | + hpu-smi + test-xpu: if: github.repository == 'bitsandbytes-foundation/bitsandbytes' needs: build-cpu @@ -252,6 +274,8 @@ jobs: - name: Show XPU Information run: | xpu-smi discovery + lspci + apt-get install -y hwinfo hwinfo --display - uses: actions/checkout@v4 From 16f5a88b493aeacd3e2cb1d874e6cd3314c5013f Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Thu, 12 Jun 2025 13:42:33 -0400 Subject: [PATCH 05/22] test --- .github/workflows/tests.yml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 8af930945..f36df8c18 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -231,8 +231,12 @@ jobs: torch_version: ["2.6.0"] runs-on: group: bandb-itac-bmemr-gaudi3-1gaudi - env: - BNB_TEST_DEVICE: hpu + container: + image: vault.habana.ai/gaudi-docker/1.20.1/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest + options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES + env: + OMPI_MCA_btl_vader_single_copy_mechanism: none + BNB_TEST_DEVICE: hpu steps: - name: Show system information run: | From fc3746d08106ceb9b46a3a80edf006ae7c0e32ce Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Thu, 12 Jun 2025 13:43:15 -0400 Subject: [PATCH 06/22] test --- .github/workflows/tests.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index f36df8c18..f2be9a345 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -278,7 +278,6 @@ jobs: - name: Show XPU Information run: | xpu-smi discovery - lspci apt-get install -y hwinfo hwinfo --display From 4c7b755fcfd83e15957d9d0132f6cf3b8aaf6d6d Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Thu, 12 Jun 2025 13:48:48 -0400 Subject: [PATCH 07/22] test --- .github/workflows/tests.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index f2be9a345..539d330e4 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -231,12 +231,12 @@ jobs: torch_version: ["2.6.0"] runs-on: group: bandb-itac-bmemr-gaudi3-1gaudi - container: - image: vault.habana.ai/gaudi-docker/1.20.1/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest - options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES - env: - OMPI_MCA_btl_vader_single_copy_mechanism: none - BNB_TEST_DEVICE: hpu + # container: + # image: vault.habana.ai/gaudi-docker/1.20.1/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest + # options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES + # env: + # OMPI_MCA_btl_vader_single_copy_mechanism: none + # BNB_TEST_DEVICE: hpu steps: - name: Show system information run: | @@ -246,7 +246,7 @@ jobs: - name: Show HPU Information run: | - hpu-smi + hl-smi test-xpu: if: github.repository == 'bitsandbytes-foundation/bitsandbytes' From 69fbb636990a378956837b36173d0d3ee8bb5298 Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Thu, 12 Jun 2025 13:49:31 -0400 Subject: [PATCH 08/22] test --- .github/workflows/tests.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 539d330e4..5c3bc2742 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -278,7 +278,8 @@ jobs: - name: Show XPU Information run: | xpu-smi discovery - apt-get install -y hwinfo + sudo xpu-smi discovery + sudo apt-get install -y hwinfo hwinfo --display - uses: actions/checkout@v4 From e82c4da1b8305211cada97c83f9591b4e3e86b8a Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Thu, 12 Jun 2025 14:02:59 -0400 Subject: [PATCH 09/22] test --- .github/workflows/tests.yml | 42 +++++++++++++++++++++++++++++++------ 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 5c3bc2742..f85994e31 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -231,12 +231,14 @@ jobs: torch_version: ["2.6.0"] runs-on: group: bandb-itac-bmemr-gaudi3-1gaudi - # container: - # image: vault.habana.ai/gaudi-docker/1.20.1/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest - # options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES - # env: - # OMPI_MCA_btl_vader_single_copy_mechanism: none - # BNB_TEST_DEVICE: hpu + env: + BNB_TEST_DEVICE: hpu + container: + image: vault.habana.ai/gaudi-docker/1.20.1/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest + options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES + env: + OMPI_MCA_btl_vader_single_copy_mechanism: none + BNB_TEST_DEVICE: hpu steps: - name: Show system information run: | @@ -248,6 +250,34 @@ jobs: run: | hl-smi + - uses: actions/checkout@v4 + + - name: Download build artifact + uses: actions/download-artifact@v4 + with: + name: lib_cpu_ubuntu-22.04_x86_64 + path: bitsandbytes/ + merge-multiple: true + + - name: Show installed packages + run: pip list + + - name: Install dependencies + run: | + pip install -e ".[test]" + pip install pytest-cov + + - name: Show installed packages + run: pip list + + - name: Show environment information + run: | + python -m torch.utils.collect_env + python -m bitsandbytes + + - name: Run tests + run: pytest --durations=100 + test-xpu: if: github.repository == 'bitsandbytes-foundation/bitsandbytes' needs: build-cpu From be47d49a23ee695bf54ad427476cad79c4bad149 Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Thu, 12 Jun 2025 14:38:01 -0400 Subject: [PATCH 10/22] test --- bitsandbytes/__init__.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/bitsandbytes/__init__.py b/bitsandbytes/__init__.py index 8fcf57a65..06b4bf2c7 100644 --- a/bitsandbytes/__init__.py +++ b/bitsandbytes/__init__.py @@ -4,6 +4,7 @@ # LICENSE file in the root directory of this source tree. +import importlib import sys import torch @@ -37,8 +38,13 @@ if hasattr(torch, "xpu") and torch.xpu.is_available(): from .backends.xpu import ops as xpu_ops -if hasattr(torch, "hpu") and torch.hpu.is_available(): - from .backends.hpu import ops as hpu_ops + +if importlib.util.find_spec("habana_frameworks.torch"): + # In case not automatically imported + import habana_frameworks.torch # type: ignore # noqa: I001 + + if hasattr(torch, "hpu") and torch.hpu.is_available(): + from .backends.hpu import ops as hpu_ops def _import_backends(): From 4faa8e2cd83353f3ec64732f0649fef9b2cb3569 Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Thu, 12 Jun 2025 14:48:22 -0400 Subject: [PATCH 11/22] test --- bitsandbytes/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bitsandbytes/__init__.py b/bitsandbytes/__init__.py index 06b4bf2c7..bd9b41e28 100644 --- a/bitsandbytes/__init__.py +++ b/bitsandbytes/__init__.py @@ -39,9 +39,9 @@ from .backends.xpu import ops as xpu_ops -if importlib.util.find_spec("habana_frameworks.torch"): +if importlib.util.find_spec("habana_frameworks") and importlib.util.find_spec("habana_frameworks.torch"): # In case not automatically imported - import habana_frameworks.torch # type: ignore # noqa: I001 + import habana_frameworks.torch # noqa: I001 if hasattr(torch, "hpu") and torch.hpu.is_available(): from .backends.hpu import ops as hpu_ops From 5dae4a835cab3e0b9697d76217999061206d6e24 Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Thu, 12 Jun 2025 15:14:04 -0400 Subject: [PATCH 12/22] test --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index f85994e31..0d3884593 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -234,7 +234,7 @@ jobs: env: BNB_TEST_DEVICE: hpu container: - image: vault.habana.ai/gaudi-docker/1.20.1/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest + image: vault.habana.ai/gaudi-docker/1.21.1/ubuntu22.04/habanalabs/pytorch-installer-2.6.0:latest options: --runtime=habana --shm-size=64G --env HABANA_VISIBLE_DEVICES --env HABANA_VISIBLE_MODULES env: OMPI_MCA_btl_vader_single_copy_mechanism: none From 5c736a7c04acd7c65db628442e56654b314f58cd Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Thu, 12 Jun 2025 16:23:19 -0400 Subject: [PATCH 13/22] skip some fp4 tests on hpu --- tests/test_autograd.py | 3 +++ tests/test_linear4bit.py | 3 +++ tests/test_ops.py | 6 ++++++ 3 files changed, 12 insertions(+) diff --git a/tests/test_autograd.py b/tests/test_autograd.py index 5fbe1065f..26cbab413 100644 --- a/tests/test_autograd.py +++ b/tests/test_autograd.py @@ -189,6 +189,9 @@ def test_matmul_4bit( if device == "cpu" and dtype != torch.float32 and any(req_grad) and torch.__version__ < (2, 6): pytest.xfail("mse_loss fp16 on CPU is not supported in torch < 2.6") + if device == "hpu" and quant_type != "nf4": + pytest.skip("HPU only supports nf4") + for i in range(3): # normal multiply if funcs[0] in [torch.mm, torch.matmul]: diff --git a/tests/test_linear4bit.py b/tests/test_linear4bit.py index f28bfa29e..f433f5d87 100644 --- a/tests/test_linear4bit.py +++ b/tests/test_linear4bit.py @@ -276,6 +276,9 @@ def test_linear4bit_torch_compile(device, quant_type, compute_dtype, compress_st if device == "cuda" and platform.system() == "Windows": pytest.skip("Triton is not officially supported on Windows") + if device == "hpu" and quant_type != "nf4": + pytest.skip("fp4 dequantization is not supported on HPU") + # Has a strange regression on Linux aarch64 CPU in torch==2.6.0 when fullgraph=False. if ( not fullgraph diff --git a/tests/test_ops.py b/tests/test_ops.py index 60c47a250..c58d0d1ac 100644 --- a/tests/test_ops.py +++ b/tests/test_ops.py @@ -179,6 +179,9 @@ def test_quantize_4bit(self, device, dtype, storage_dtype, quant_type, blocksize @pytest.mark.parametrize("quant_type", ["fp4", "nf4"]) @pytest.mark.parametrize("blocksize", [64, 128, 256, 512]) def test_dequantize_4bit(self, device, dtype, storage_dtype, quant_type, blocksize): + if device == "hpu" and quant_type != "nf4": + pytest.skip("fp4 dequantization is not supported on HPU") + shape = (128, 128) n = prod(shape) @@ -210,6 +213,9 @@ def test_dequantize_4bit(self, device, dtype, storage_dtype, quant_type, blocksi @pytest.mark.parametrize("quant_type", ["fp4", "nf4"]) @pytest.mark.parametrize("blocksize", [64, 128, 256, 512]) def test_gemv_4bit(self, device, dtype, storage_dtype, quant_type, blocksize): + if device == "hpu" and quant_type != "nf4": + pytest.skip("fp4 dequantization is not supported on HPU") + out_features = 1024 in_features = 256 From bdd28f2b838c59f0feb48024c1c3790d1461b232 Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Thu, 12 Jun 2025 16:59:28 -0400 Subject: [PATCH 14/22] skip some fp4 tests on hpu --- tests/test_functional.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/test_functional.py b/tests/test_functional.py index 2e2e898cc..7c7c0fec8 100644 --- a/tests/test_functional.py +++ b/tests/test_functional.py @@ -1101,6 +1101,9 @@ class TestQuantize4BitFunctional: @pytest.mark.parametrize("quant_type", ["fp4", "nf4"]) @pytest.mark.parametrize("blocksize", [64, 128, 256, 512, 1024, 2048, 4096]) def test_4bit_quant(self, device, dtype, quant_type, blocksize): + if device == "hpu" and quant_type != "nf4": + pytest.skip("fp4 dequantization is not supported on HPU") + A1 = torch.randn(1024, 1024, device=device, dtype=dtype) qa, SA = F.quantize_4bit(A1, blocksize=blocksize, quant_type=quant_type) A2 = F.dequantize_4bit(qa, SA, blocksize=blocksize, quant_type=quant_type) @@ -1133,6 +1136,9 @@ def test_4bit_quant(self, device, dtype, quant_type, blocksize): @pytest.mark.parametrize("quant_type", ["fp4", "nf4"]) @pytest.mark.parametrize("blocksize", [64, 128], ids=id_formatter("blocksize")) def test_4bit_compressed_stats(self, device, quant_type, blocksize): + if device == "hpu" and quant_type != "nf4": + pytest.skip("fp4 dequantization is not supported on HPU") + errs1 = [] errs2 = [] for i in range(10): @@ -1205,6 +1211,9 @@ def test_bench_4bit_dequant(self, quant_type): ) @pytest.mark.parametrize("dim", [128, 256, 512, 1024], ids=id_formatter("dim")) def test_gemv_4bit(self, device, dim, dtype, storage_type, quant_storage, double_quant, kind): + if device == "hpu" and storage_type != "nf4": + pytest.skip("fp4 dequantization is not supported on HPU") + errs1 = [] errs2 = [] errs3 = [] @@ -1354,6 +1363,9 @@ def test_gemv_eye_4bit(self, device, storage_type, dtype, double_quant): if device == "cpu" and dtype == torch.bfloat16 and torch.__version__ < (2, 3): pytest.skip("eye doe not support bfloat16 on CPU in torch < 2.3") + if device == "hpu" and storage_type != "nf4": + pytest.skip("fp4 dequantization is not supported on HPU") + dims = 10 torch.random.manual_seed(np.random.randint(0, 412424242)) dims = get_test_dims(0, 8192, n=dims) From 55da7f397e91a1436352b07c6f8cec79047c99e2 Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Thu, 12 Jun 2025 17:36:30 -0400 Subject: [PATCH 15/22] skip gemv tests on hpu --- tests/test_functional.py | 4 ++-- tests/test_ops.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_functional.py b/tests/test_functional.py index 7c7c0fec8..8ec43c244 100644 --- a/tests/test_functional.py +++ b/tests/test_functional.py @@ -1211,8 +1211,8 @@ def test_bench_4bit_dequant(self, quant_type): ) @pytest.mark.parametrize("dim", [128, 256, 512, 1024], ids=id_formatter("dim")) def test_gemv_4bit(self, device, dim, dtype, storage_type, quant_storage, double_quant, kind): - if device == "hpu" and storage_type != "nf4": - pytest.skip("fp4 dequantization is not supported on HPU") + if device == "hpu": + pytest.skip("gemv not supported on HPU") errs1 = [] errs2 = [] diff --git a/tests/test_ops.py b/tests/test_ops.py index c58d0d1ac..fb5a399d5 100644 --- a/tests/test_ops.py +++ b/tests/test_ops.py @@ -213,8 +213,8 @@ def test_dequantize_4bit(self, device, dtype, storage_dtype, quant_type, blocksi @pytest.mark.parametrize("quant_type", ["fp4", "nf4"]) @pytest.mark.parametrize("blocksize", [64, 128, 256, 512]) def test_gemv_4bit(self, device, dtype, storage_dtype, quant_type, blocksize): - if device == "hpu" and quant_type != "nf4": - pytest.skip("fp4 dequantization is not supported on HPU") + if device == "hpu": + pytest.skip("gemv not supported on HPU") out_features = 1024 in_features = 256 From 3f97860545950bf7148c3eb8e98b4f0105ad7cba Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Thu, 12 Jun 2025 18:22:01 -0400 Subject: [PATCH 16/22] test --- tests/test_autograd.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/test_autograd.py b/tests/test_autograd.py index 26cbab413..0ea430220 100644 --- a/tests/test_autograd.py +++ b/tests/test_autograd.py @@ -233,6 +233,9 @@ def test_matmul_4bit( out_bnb.data.copy_(out_torch) if device == "cuda": torch.cuda.synchronize() + elif device == "hpu": + torch.hpu.synchronize() + loss_bnb = torch.nn.functional.mse_loss(out_bnb, target).mean() loss_bnb.backward() gradA1 = A.grad From de6057be7672901c2c68ecfcce4ab854809ea6b3 Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Mon, 16 Jun 2025 14:18:54 -0400 Subject: [PATCH 17/22] Additional test patches for HPU --- tests/test_linear4bit.py | 3 --- tests/test_linear8bitlt.py | 3 ++- tests/test_modules.py | 20 +++++++++++++++++++- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/tests/test_linear4bit.py b/tests/test_linear4bit.py index 28b04ded8..9fcde695d 100644 --- a/tests/test_linear4bit.py +++ b/tests/test_linear4bit.py @@ -294,9 +294,6 @@ def test_linear4bit_torch_compile(device, quant_type, compute_dtype, compress_st if device == "cuda" and platform.system() == "Windows": pytest.skip("Triton is not officially supported on Windows") - if device == "hpu" and quant_type != "nf4": - pytest.skip("fp4 dequantization is not supported on HPU") - # Has a strange regression on Linux aarch64 CPU in torch==2.6.0 when fullgraph=False. if ( not fullgraph diff --git a/tests/test_linear8bitlt.py b/tests/test_linear8bitlt.py index 271920b11..86726bd44 100644 --- a/tests/test_linear8bitlt.py +++ b/tests/test_linear8bitlt.py @@ -257,7 +257,8 @@ def test_linear8bitlt_torch_compile(device, threshold, bias, fullgraph, mode): ref_output = net(x) # Compile the model - compiled_net = torch.compile(net, fullgraph=fullgraph, mode=mode) + compile_backend = "hpu_backend" if device == "hpu" else "inductor" + compiled_net = torch.compile(net, fullgraph=fullgraph, mode=mode, backend=compile_backend) # Get output from compiled model with torch.no_grad(): diff --git a/tests/test_modules.py b/tests/test_modules.py index 9eeb79f76..f996c45a1 100644 --- a/tests/test_modules.py +++ b/tests/test_modules.py @@ -5,7 +5,7 @@ from torch import nn import bitsandbytes as bnb -from tests.helpers import get_available_devices, id_formatter +from tests.helpers import get_available_devices, id_formatter, is_supported_on_hpu class MockArgs: @@ -295,7 +295,13 @@ def test_kbit_backprop(device, module): torch.nn.init.kaiming_normal_(ref[0].weight) torch.nn.init.kaiming_normal_(ref[1].weight) ref[1].weight.requires_grad_(False) + kbit = nn.Sequential(*[torch.nn.Linear(dim1, dim2), module(dim2, 128)]) + + if device == "hpu": + if isinstance(module, bnb.nn.LinearFP4): + pytest.skip("FP4 is not supported on HPU") + kbit[0].weight.detach().copy_(ref[0].weight) kbit[1].weight.detach().copy_(ref[1].weight) kbit[0].bias.detach().copy_(ref[0].bias) @@ -358,6 +364,12 @@ def test_kbit_backprop(device, module): ids=lambda x: x.__name__ if inspect.isclass(x) else str(x), ) def test_embedding_lossless(device, embedding_class, input_shape, embedding_dim, quant_storage): + if device == "hpu": + if embedding_class is bnb.nn.EmbeddingFP4: + pytest.skip("FP4 is not supported on HPU") + elif embedding_class is bnb.nn.EmbeddingNF4 and not is_supported_on_hpu("nf4", torch.float32, quant_storage): + pytest.skip("This configuration is not supported on HPU") + num_embeddings = 128 src_weight = (torch.randn((num_embeddings, embedding_dim), dtype=torch.float32) > 0).to( @@ -403,6 +415,12 @@ def test_embedding_lossless(device, embedding_class, input_shape, embedding_dim, ids=lambda x: x.__name__ if inspect.isclass(x) else str(x), ) def test_embedding_error(device, embedding_class, input_shape, embedding_dim, quant_storage): + if device == "hpu": + if embedding_class is bnb.nn.EmbeddingFP4: + pytest.skip("FP4 is not supported on HPU") + elif embedding_class is bnb.nn.EmbeddingNF4 and not is_supported_on_hpu("nf4", torch.float32, quant_storage): + pytest.skip("This configuration is not supported on HPU") + is_8bit = embedding_class is bnb.nn.Embedding8bit num_embeddings = 128 From 214c3f3abaf77bd17a397d71a02dc2fdcb083f9f Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Mon, 16 Jun 2025 14:44:55 -0400 Subject: [PATCH 18/22] HPU test update --- tests/test_modules.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_modules.py b/tests/test_modules.py index f996c45a1..52d187a18 100644 --- a/tests/test_modules.py +++ b/tests/test_modules.py @@ -299,7 +299,7 @@ def test_kbit_backprop(device, module): kbit = nn.Sequential(*[torch.nn.Linear(dim1, dim2), module(dim2, 128)]) if device == "hpu": - if isinstance(module, bnb.nn.LinearFP4): + if isinstance(kbit, bnb.nn.LinearFP4): pytest.skip("FP4 is not supported on HPU") kbit[0].weight.detach().copy_(ref[0].weight) From a1b333167d2093d20d0ca0c760b1c9533aa00e40 Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Mon, 16 Jun 2025 15:20:34 -0400 Subject: [PATCH 19/22] HPU test update --- tests/test_modules.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/test_modules.py b/tests/test_modules.py index 52d187a18..b89dafc6d 100644 --- a/tests/test_modules.py +++ b/tests/test_modules.py @@ -298,9 +298,8 @@ def test_kbit_backprop(device, module): kbit = nn.Sequential(*[torch.nn.Linear(dim1, dim2), module(dim2, 128)]) - if device == "hpu": - if isinstance(kbit, bnb.nn.LinearFP4): - pytest.skip("FP4 is not supported on HPU") + if device == "hpu" and isinstance(kbit[1], bnb.nn.LinearFP4): + pytest.skip("FP4 is not supported on HPU") kbit[0].weight.detach().copy_(ref[0].weight) kbit[1].weight.detach().copy_(ref[1].weight) From 0a7f959ce1d04272ff40e3e22d8e51b48e8a5fd1 Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Mon, 16 Jun 2025 15:39:48 -0400 Subject: [PATCH 20/22] HPU test update --- tests/test_modules.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_modules.py b/tests/test_modules.py index b89dafc6d..bdfa830f4 100644 --- a/tests/test_modules.py +++ b/tests/test_modules.py @@ -276,9 +276,9 @@ def test_linear_kbit_fp32_bias(device, module): "NF4": bnb.nn.LinearNF4, "FP4+C": lambda d1, d2: bnb.nn.LinearFP4(d1, d2, compress_statistics=True), "NF4+C": lambda d1, d2: bnb.nn.LinearNF4(d1, d2, compress_statistics=True), - "NF4+fp32": lambda d1, d2: bnb.nn.LinearFP4(d1, d2, compute_dtype=torch.float32), - "NF4+fp16": lambda d1, d2: bnb.nn.LinearFP4(d1, d2, compute_dtype=torch.float16), - "NF4+bf16": lambda d1, d2: bnb.nn.LinearFP4(d1, d2, compute_dtype=torch.bfloat16), + "NF4+fp32": lambda d1, d2: bnb.nn.LinearNF4(d1, d2, compute_dtype=torch.float32), + "NF4+fp16": lambda d1, d2: bnb.nn.LinearNF4(d1, d2, compute_dtype=torch.float16), + "NF4+bf16": lambda d1, d2: bnb.nn.LinearNF4(d1, d2, compute_dtype=torch.bfloat16), } From 2ba4b8feb48b947ed7849f9cec2831434ef85b3b Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Mon, 16 Jun 2025 17:37:55 -0400 Subject: [PATCH 21/22] HPU test update --- tests/test_modules.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_modules.py b/tests/test_modules.py index bdfa830f4..e35afb214 100644 --- a/tests/test_modules.py +++ b/tests/test_modules.py @@ -298,7 +298,7 @@ def test_kbit_backprop(device, module): kbit = nn.Sequential(*[torch.nn.Linear(dim1, dim2), module(dim2, 128)]) - if device == "hpu" and isinstance(kbit[1], bnb.nn.LinearFP4): + if device == "hpu" and isinstance(kbit[1], bnb.nn.Linear4bit) and kbit[1].weight.quant_type == "fp4": pytest.skip("FP4 is not supported on HPU") kbit[0].weight.detach().copy_(ref[0].weight) From 0c529a710117c6b047cc126faf18aeff88e01e03 Mon Sep 17 00:00:00 2001 From: Matthew Douglas <38992547+matthewdouglas@users.noreply.github.com> Date: Tue, 17 Jun 2025 12:18:28 -0400 Subject: [PATCH 22/22] Format --- bitsandbytes/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bitsandbytes/__init__.py b/bitsandbytes/__init__.py index bd9b41e28..516afa51f 100644 --- a/bitsandbytes/__init__.py +++ b/bitsandbytes/__init__.py @@ -41,7 +41,7 @@ if importlib.util.find_spec("habana_frameworks") and importlib.util.find_spec("habana_frameworks.torch"): # In case not automatically imported - import habana_frameworks.torch # noqa: I001 + import habana_frameworks.torch if hasattr(torch, "hpu") and torch.hpu.is_available(): from .backends.hpu import ops as hpu_ops