From 1f2a970ea5a4ad1748105af2f864046545704cd1 Mon Sep 17 00:00:00 2001
From: Titus von Koeller <9048635+Titus-von-Koeller@users.noreply.github.com>
Date: Mon, 28 Apr 2025 16:03:34 +0000
Subject: [PATCH 01/12] C lib loading: add fallback with sensible error msg

---
 bitsandbytes/cextension.py | 59 ++++++++++++++++++++++++++++++--------
 1 file changed, 47 insertions(+), 12 deletions(-)

diff --git a/bitsandbytes/cextension.py b/bitsandbytes/cextension.py
index e51ef7972..0060cd456 100644
--- a/bitsandbytes/cextension.py
+++ b/bitsandbytes/cextension.py
@@ -61,7 +61,37 @@ def __init__(self, lib: ct.CDLL):
         lib.cget_managed_ptr.restype = ct.c_void_p
 
 
+class MockBNBNativeLibrary(BNBNativeLibrary):
+    """
+    Mock BNBNativeLibrary that raises an error when trying to use native library functionality without successfully loading the library.
+
+    Any method or attribute access will raise a RuntimeError with a message that points to the original error and provides troubleshooting steps.
+    """
+
+    def __init__(self, error_msg: str):
+        self.error_msg = error_msg
+
+    def __getattr__(self, name):
+        base_msg = "Attempted to use bitsandbytes native library functionality but it's not available.\n\n"
+        original_error = f"Original error: {self.error_msg}\n\n" if self.error_msg else ""
+        troubleshooting = (
+            "This typically happens when:\n"
+            "1. BNB doesn't ship with a pre-compiled binary for your CUDA version\n"
+            "2. The library wasn't compiled properly during installation\n"
+            "3. Missing CUDA dependencies\n"
+            "4. PyTorch/bitsandbytes version mismatch\n\n"
+            "Run 'python -m bitsandbytes' for diagnostics."
+        )
+        raise RuntimeError(base_msg + original_error + troubleshooting)
+
+    def __getitem__(self, name):
+        return self.__getattr__(name)
+
+
 def get_native_library() -> BNBNativeLibrary:
+    """
+    Load CUDA library XOR CPU, as the latter contains a subset of symbols of the former.
+    """
     binary_path = PACKAGE_DIR / f"libbitsandbytes_cpu{DYNAMIC_LIBRARY_SUFFIX}"
     cuda_specs = get_cuda_specs()
     if cuda_specs:
@@ -86,17 +116,22 @@ def get_native_library() -> BNBNativeLibrary:
 try:
     lib = get_native_library()
 except Exception as e:
-    lib = None
-    logger.error(f"Could not load bitsandbytes native library: {e}", exc_info=True)
-    if torch.cuda.is_available():
-        logger.warning(
-            """
-CUDA Setup failed despite CUDA being available. Please run the following command to get more information:
+    error_msg = f"Could not load bitsandbytes native library: {e}"
+    logger.error(error_msg, exc_info=True)
 
-python -m bitsandbytes
-
-Inspect the output of the command and see if you can locate CUDA libraries. You might need to add them
-to your LD_LIBRARY_PATH. If you suspect a bug, please take the information from python -m bitsandbytes
-and open an issue at: https://github.com/bitsandbytes-foundation/bitsandbytes/issues
-""",
+    diagnostic_help = ""
+    if torch.cuda.is_available():
+        diagnostic_help = (
+            "CUDA Setup failed despite CUDA being available. "
+            "Please run the following command to get more information:\n\n"
+            "python -m bitsandbytes\n\n"
+            "Inspect the output of the command and see if you can locate CUDA libraries. "
+            "You might need to add them to your LD_LIBRARY_PATH. "
+            "If you suspect a bug, please take the information from the command and open an issue at:\n\n"
+            "https://github.com/bitsandbytes-foundation/bitsandbytes/issues\n\n"
+            "If you are using a custom CUDA version, you might need to set the BNB_CUDA_VERSION "
+            "environment variable to the correct version."
         )
+
+    # create a mock with error messaging as fallback
+    lib = MockBNBNativeLibrary(diagnostic_help)

From c2480e350ffc096eaad924ca26fb6f5ea3dd088f Mon Sep 17 00:00:00 2001
From: Titus von Koeller <9048635+Titus-von-Koeller@users.noreply.github.com>
Date: Tue, 29 Apr 2025 15:03:24 +0000
Subject: [PATCH 02/12] further improvements to C lib fallback

---
 bitsandbytes/cextension.py | 63 ++++++++++++++++++++++++++++++++------
 1 file changed, 53 insertions(+), 10 deletions(-)

diff --git a/bitsandbytes/cextension.py b/bitsandbytes/cextension.py
index 0060cd456..a83659cf1 100644
--- a/bitsandbytes/cextension.py
+++ b/bitsandbytes/cextension.py
@@ -7,7 +7,7 @@
 import torch
 
 from bitsandbytes.consts import DYNAMIC_LIBRARY_SUFFIX, PACKAGE_DIR
-from bitsandbytes.cuda_specs import CUDASpecs, get_cuda_specs
+from bitsandbytes.cuda_specs import CUDASpecs, get_cuda_specs, get_cuda_version_tuple
 
 logger = logging.getLogger(__name__)
 
@@ -61,28 +61,71 @@ def __init__(self, lib: ct.CDLL):
         lib.cget_managed_ptr.restype = ct.c_void_p
 
 
+def get_available_cuda_binaries() -> list[str]:
+    """Get formatted CUDA versions from existing library files using cuda_specs logic"""
+    lib_pattern = f"libbitsandbytes_cuda*{DYNAMIC_LIBRARY_SUFFIX}"
+    versions = []
+    for lib in Path(__file__).parent.glob(lib_pattern):
+        match = re.search(r"cuda(\d{3})", lib.name)
+        if match:
+            ver_code = int(match.group(1))
+            major = ver_code // 10
+            minor = ver_code % 10
+            versions.append(f"{major}.{minor}")
+    return sorted(versions)
+
+
 class MockBNBNativeLibrary(BNBNativeLibrary):
     """
-    Mock BNBNativeLibrary that raises an error when trying to use native library functionality without successfully loading the library.
-
-    Any method or attribute access will raise a RuntimeError with a message that points to the original error and provides troubleshooting steps.
+    Mock BNBNativeLibrary that raises an error when trying to use native library
+    functionality without successfully loading the library.
+    Any method or attribute access will raise a RuntimeError with a message that
+    points to the original error and provides troubleshooting steps.
     """
 
     def __init__(self, error_msg: str):
         self.error_msg = error_msg
+        self.user_cuda_version = get_cuda_version_tuple()
 
     def __getattr__(self, name):
+        available_versions = get_available_cuda_binaries()
+        version_list = ", ".join(available_versions) if available_versions else "none"
+
+        user_ver = "Not detected"
+        if self.user_cuda_version:
+            user_ver = f"{self.user_cuda_version[0]}.{self.user_cuda_version[1]}"
+
+        override_value = os.environ.get("BNB_CUDA_VERSION", None)
+        override_info = (
+            f"\nCUDA version overridden with BNB_CUDA_VERSION={override_value} environment variable"
+            if override_value
+            else ""
+        )
+
+        note = "To make bitsandbytes work, the compiled version of the library must match the corresponding linked CUDA version. If you are using a CUDA version that doesn't come with a pre-compiled binary, the only solution is to compile the library from source."
+
+        cuda_info = (
+            f"Detected PyTorch CUDA version: {user_ver}\n"
+            f"Available pre-compiled bitsandbytes binaries for CUDA versions: {version_list}"
+            + override_info
+            + "\n\n"
+            + note
+            + "\n\n"
+        )
+
         base_msg = "Attempted to use bitsandbytes native library functionality but it's not available.\n\n"
         original_error = f"Original error: {self.error_msg}\n\n" if self.error_msg else ""
         troubleshooting = (
             "This typically happens when:\n"
             "1. BNB doesn't ship with a pre-compiled binary for your CUDA version\n"
-            "2. The library wasn't compiled properly during installation\n"
-            "3. Missing CUDA dependencies\n"
-            "4. PyTorch/bitsandbytes version mismatch\n\n"
-            "Run 'python -m bitsandbytes' for diagnostics."
+            "2. The library wasn't compiled properly during installation from source\n"
+            "3. Missing CUDA dependencies\n\n"
         )
-        raise RuntimeError(base_msg + original_error + troubleshooting)
+        err_msg = (
+            base_msg + troubleshooting + cuda_info + original_error + ("Run 'python -m bitsandbytes' for diagnostics.")
+        )
+
+        raise RuntimeError(err_msg)
 
     def __getitem__(self, name):
         return self.__getattr__(name)
@@ -117,7 +160,7 @@ def get_native_library() -> BNBNativeLibrary:
     lib = get_native_library()
 except Exception as e:
     error_msg = f"Could not load bitsandbytes native library: {e}"
-    logger.error(error_msg, exc_info=True)
+    logger.error(error_msg, exc_info=False)
 
     diagnostic_help = ""
     if torch.cuda.is_available():

From 1c1b2576eb298d3e249da3186ae0382a00c0ec30 Mon Sep 17 00:00:00 2001
From: Titus von Koeller <9048635+Titus-von-Koeller@users.noreply.github.com>
Date: Tue, 29 Apr 2025 17:37:57 +0000
Subject: [PATCH 03/12] further tweaks to reporting

---
 bitsandbytes/cextension.py | 157 ++++++++++++++++++++++++++++---------
 1 file changed, 122 insertions(+), 35 deletions(-)

diff --git a/bitsandbytes/cextension.py b/bitsandbytes/cextension.py
index a83659cf1..0b68c581d 100644
--- a/bitsandbytes/cextension.py
+++ b/bitsandbytes/cextension.py
@@ -3,6 +3,7 @@
 import os
 from pathlib import Path
 import re
+from typing import Optional
 
 import torch
 
@@ -75,6 +76,91 @@ def get_available_cuda_binaries() -> list[str]:
     return sorted(versions)
 
 
+def parse_cuda_version(version_str: str) -> str:
+    """Convert raw version string (e.g. '118' from env var) to formatted version (e.g. '11.8')"""
+    if version_str.isdigit() and len(version_str) == 3:
+        return f"{version_str[:2]}.{version_str[2]}"
+    return version_str  # fallback as safety net
+
+
+def _format_cuda_error_message(
+    available_versions: list[str],
+    user_cuda_version: str,
+    override_info: str,
+    original_error: str = "",
+    include_diagnostics: bool = False,
+    include_override_notes: bool = False,
+    required_version: Optional[str] = None,
+    version_missing: bool = False,
+) -> str:
+    version_list = ", ".join(available_versions) if available_versions else "none"
+    base_msg = "Attempted to use bitsandbytes native library functionality but it's not available.\n\n"
+
+    # Explicit version availability check
+    version_alert = ""
+    if version_missing and required_version:
+        version_list_str = "\n- " + "\n- ".join(available_versions) if available_versions else "NONE"
+        version_alert = (
+            f"🚨 CUDA VERSION MISMATCH 🚨\n"
+            f"Requested CUDA version:  {required_version}\n"
+            f"Available pre-compiled versions: {version_list_str}\n\n"
+            "This means:\n"
+            "1. The version you're trying to use is NOT distributed with this package\n"
+            "2. You MUST compile from source for this specific CUDA version\n"
+            "3. The installation will NOT work until you compile or choose a CUDA supported version\n\n"
+        )
+
+    troubleshooting = (
+        "This typically happens when:\n"
+        "1. bitsandbytes doesn't ship with a pre-compiled binary for your CUDA version\n"
+        "2. The library wasn't compiled properly during installation from source\n"
+        "3. Missing CUDA dependencies\n\n"
+    )
+
+    note = (
+        "To make bitsandbytes work, the compiled library version MUST exactly match the linked CUDA version.\n"
+        "If your CUDA version doesn't have a pre-compiled binary, you MUST compile from source.\n\n"
+    )
+
+    cuda_info = (
+        f"Detected PyTorch CUDA version: {user_cuda_version}\n"
+        f"Available pre-compiled bitsandbytes binaries for these CUDA versions: {version_list}\n"
+        f"{override_info}\n\n"
+    )
+
+    compile_instructions = (
+        (
+            "You have three options:\n"
+            "1. COMPILE FROM SOURCE (required if no binary exists):\n"
+            "   https://huggingface.co/docs/bitsandbytes/main/en/installation#cuda-compile\n"
+            "2. Use BNB_CUDA_VERSION to specify a DIFFERENT CUDA version from the detected one\n"
+            "3. Check LD_LIBRARY_PATH contains the correct CUDA libraries\n\n"
+        )
+        if include_override_notes
+        else ""
+    )
+
+    diagnostics = (
+        (
+            "🔍 Run this command for detailed diagnostics:\n"
+            "python -m bitsandbytes\n\n"
+            "If you've tried everything and still have issues:\n"
+            "1. Include ALL version info (operating system, bitsandbytes, pytorch, cuda, python)\n"
+            "2. Describe what you've tried in detail\n"
+            "3. Open an issue with this information:\n"
+            "   https://github.com/bitsandbytes-foundation/bitsandbytes/issues\n\n"
+        )
+        if include_diagnostics
+        else ""
+    )
+
+    return (
+        f"{version_alert}{base_msg}{troubleshooting}{cuda_info}"
+        f"{note}{compile_instructions}"
+        f"{original_error}\n{diagnostics}"
+    )
+
+
 class MockBNBNativeLibrary(BNBNativeLibrary):
     """
     Mock BNBNativeLibrary that raises an error when trying to use native library
@@ -89,43 +175,27 @@ def __init__(self, error_msg: str):
 
     def __getattr__(self, name):
         available_versions = get_available_cuda_binaries()
-        version_list = ", ".join(available_versions) if available_versions else "none"
-
-        user_ver = "Not detected"
-        if self.user_cuda_version:
-            user_ver = f"{self.user_cuda_version[0]}.{self.user_cuda_version[1]}"
+        override_value = os.environ.get("BNB_CUDA_VERSION")
+        override_info = f"\nCUDA override: BNB_CUDA_VERSION={override_value}" if override_value else ""
 
-        override_value = os.environ.get("BNB_CUDA_VERSION", None)
-        override_info = (
-            f"\nCUDA version overridden with BNB_CUDA_VERSION={override_value} environment variable"
+        formatted_version = (
+            parse_cuda_version(override_value)
             if override_value
-            else ""
+            else f"{self.user_cuda_version[0]}.{self.user_cuda_version[1]}"
         )
-
-        note = "To make bitsandbytes work, the compiled version of the library must match the corresponding linked CUDA version. If you are using a CUDA version that doesn't come with a pre-compiled binary, the only solution is to compile the library from source."
-
-        cuda_info = (
-            f"Detected PyTorch CUDA version: {user_ver}\n"
-            f"Available pre-compiled bitsandbytes binaries for CUDA versions: {version_list}"
-            + override_info
-            + "\n\n"
-            + note
-            + "\n\n"
-        )
-
-        base_msg = "Attempted to use bitsandbytes native library functionality but it's not available.\n\n"
-        original_error = f"Original error: {self.error_msg}\n\n" if self.error_msg else ""
-        troubleshooting = (
-            "This typically happens when:\n"
-            "1. BNB doesn't ship with a pre-compiled binary for your CUDA version\n"
-            "2. The library wasn't compiled properly during installation from source\n"
-            "3. Missing CUDA dependencies\n\n"
+        required_version = formatted_version
+        version_missing = required_version not in available_versions
+
+        msg = _format_cuda_error_message(
+            available_versions=available_versions,
+            user_cuda_version=f"{self.user_cuda_version[0]}.{self.user_cuda_version[1]}",
+            override_info=override_info,
+            original_error=f"Original error: {self.error_msg}\n" if self.error_msg else "",
+            include_diagnostics=True,
+            required_version=formatted_version,
+            version_missing=version_missing,
         )
-        err_msg = (
-            base_msg + troubleshooting + cuda_info + original_error + ("Run 'python -m bitsandbytes' for diagnostics.")
-        )
-
-        raise RuntimeError(err_msg)
+        raise RuntimeError(msg)
 
     def __getitem__(self, name):
         return self.__getattr__(name)
@@ -142,7 +212,24 @@ def get_native_library() -> BNBNativeLibrary:
         if cuda_binary_path.exists():
             binary_path = cuda_binary_path
         else:
-            logger.warning("Could not find the bitsandbytes CUDA binary at %r", cuda_binary_path)
+            available_versions = get_available_cuda_binaries()
+            env_version = os.environ.get("BNB_CUDA_VERSION")
+            override_info = "\nCUDA override active" if env_version else ""
+
+            formatted_version = parse_cuda_version(env_version) if env_version else cuda_specs.cuda_version_string
+            required_version = formatted_version
+            version_missing = required_version not in available_versions
+
+            msg = _format_cuda_error_message(
+                available_versions=available_versions,
+                user_cuda_version=cuda_specs.cuda_version_string,
+                override_info=override_info,
+                include_override_notes=True,
+                required_version=formatted_version,
+                version_missing=version_missing,
+            )
+            logger.warning(msg)
+
     logger.debug(f"Loading bitsandbytes native library from: {binary_path}")
     dll = ct.cdll.LoadLibrary(str(binary_path))
 
@@ -165,7 +252,7 @@ def get_native_library() -> BNBNativeLibrary:
     diagnostic_help = ""
     if torch.cuda.is_available():
         diagnostic_help = (
-            "CUDA Setup failed despite CUDA being available. "
+            "CUDA Setup failed despite CUDA being available.\n\n"
             "Please run the following command to get more information:\n\n"
             "python -m bitsandbytes\n\n"
             "Inspect the output of the command and see if you can locate CUDA libraries. "

From 9274fb18a961cf32af2f2492946a71345c54819b Mon Sep 17 00:00:00 2001
From: Titus von Koeller <9048635+Titus-von-Koeller@users.noreply.github.com>
Date: Wed, 30 Apr 2025 12:09:44 +0000
Subject: [PATCH 04/12] cleanup existing code

---
 bitsandbytes/cextension.py | 103 ++++++++++---------------------------
 1 file changed, 28 insertions(+), 75 deletions(-)

diff --git a/bitsandbytes/cextension.py b/bitsandbytes/cextension.py
index 0b68c581d..5935282c3 100644
--- a/bitsandbytes/cextension.py
+++ b/bitsandbytes/cextension.py
@@ -62,7 +62,7 @@ def __init__(self, lib: ct.CDLL):
         lib.cget_managed_ptr.restype = ct.c_void_p
 
 
-def get_available_cuda_binaries() -> list[str]:
+def get_available_cuda_binary_versions() -> list[str]:
     """Get formatted CUDA versions from existing library files using cuda_specs logic"""
     lib_pattern = f"libbitsandbytes_cuda*{DYNAMIC_LIBRARY_SUFFIX}"
     versions = []
@@ -86,26 +86,23 @@ def parse_cuda_version(version_str: str) -> str:
 def _format_cuda_error_message(
     available_versions: list[str],
     user_cuda_version: str,
-    override_info: str,
     original_error: str = "",
-    include_diagnostics: bool = False,
-    include_override_notes: bool = False,
-    required_version: Optional[str] = None,
-    version_missing: bool = False,
+    requested_version: Optional[str] = None,
 ) -> str:
-    version_list = ", ".join(available_versions) if available_versions else "none"
     base_msg = "Attempted to use bitsandbytes native library functionality but it's not available.\n\n"
 
-    # Explicit version availability check
     version_alert = ""
-    if version_missing and required_version:
-        version_list_str = "\n- " + "\n- ".join(available_versions) if available_versions else "NONE"
+    if requested_version not in available_versions:
+        version_list_str = "\n  - " + "\n  - ".join(available_versions) if available_versions else "NONE"
         version_alert = (
             f"🚨 CUDA VERSION MISMATCH 🚨\n"
-            f"Requested CUDA version:  {required_version}\n"
+            f"Requested CUDA version:          {requested_version}\n"
+            f"Detected PyTorch CUDA version:   {user_cuda_version}\n"
             f"Available pre-compiled versions: {version_list_str}\n\n"
             "This means:\n"
             "1. The version you're trying to use is NOT distributed with this package\n"
+            if available_versions
+            else "1. You're not using the package but checked-out the source code\n"
             "2. You MUST compile from source for this specific CUDA version\n"
             "3. The installation will NOT work until you compile or choose a CUDA supported version\n\n"
         )
@@ -122,43 +119,25 @@ def _format_cuda_error_message(
         "If your CUDA version doesn't have a pre-compiled binary, you MUST compile from source.\n\n"
     )
 
-    cuda_info = (
-        f"Detected PyTorch CUDA version: {user_cuda_version}\n"
-        f"Available pre-compiled bitsandbytes binaries for these CUDA versions: {version_list}\n"
-        f"{override_info}\n\n"
-    )
-
     compile_instructions = (
-        (
-            "You have three options:\n"
-            "1. COMPILE FROM SOURCE (required if no binary exists):\n"
-            "   https://huggingface.co/docs/bitsandbytes/main/en/installation#cuda-compile\n"
-            "2. Use BNB_CUDA_VERSION to specify a DIFFERENT CUDA version from the detected one\n"
-            "3. Check LD_LIBRARY_PATH contains the correct CUDA libraries\n\n"
-        )
-        if include_override_notes
-        else ""
+        "You have three options:\n"
+        "1. COMPILE FROM SOURCE (required if no binary exists):\n"
+        "   https://huggingface.co/docs/bitsandbytes/main/en/installation#cuda-compile\n"
+        "2. Use BNB_CUDA_VERSION to specify a DIFFERENT CUDA version from the detected one, which is installed on your machine and matching an available pre-compiled version listed above\n"
+        "3. Check LD_LIBRARY_PATH contains the correct CUDA libraries\n\n"
     )
 
     diagnostics = (
-        (
-            "🔍 Run this command for detailed diagnostics:\n"
-            "python -m bitsandbytes\n\n"
-            "If you've tried everything and still have issues:\n"
-            "1. Include ALL version info (operating system, bitsandbytes, pytorch, cuda, python)\n"
-            "2. Describe what you've tried in detail\n"
-            "3. Open an issue with this information:\n"
-            "   https://github.com/bitsandbytes-foundation/bitsandbytes/issues\n\n"
-        )
-        if include_diagnostics
-        else ""
+        "🔍 Run this command for detailed diagnostics:\n"
+        "python -m bitsandbytes\n\n"
+        "If you've tried everything and still have issues:\n"
+        "1. Include ALL version info (operating system, bitsandbytes, pytorch, cuda, python)\n"
+        "2. Describe what you've tried in detail\n"
+        "3. Open an issue with this information:\n"
+        "   https://github.com/bitsandbytes-foundation/bitsandbytes/issues\n\n"
     )
 
-    return (
-        f"{version_alert}{base_msg}{troubleshooting}{cuda_info}"
-        f"{note}{compile_instructions}"
-        f"{original_error}\n{diagnostics}"
-    )
+    return f"{version_alert}{base_msg}{troubleshooting}{note}{compile_instructions}{original_error}\n{diagnostics}"
 
 
 class MockBNBNativeLibrary(BNBNativeLibrary):
@@ -174,26 +153,20 @@ def __init__(self, error_msg: str):
         self.user_cuda_version = get_cuda_version_tuple()
 
     def __getattr__(self, name):
-        available_versions = get_available_cuda_binaries()
+        available_versions = get_available_cuda_binary_versions()
         override_value = os.environ.get("BNB_CUDA_VERSION")
-        override_info = f"\nCUDA override: BNB_CUDA_VERSION={override_value}" if override_value else ""
 
-        formatted_version = (
+        requested_version = (
             parse_cuda_version(override_value)
             if override_value
             else f"{self.user_cuda_version[0]}.{self.user_cuda_version[1]}"
         )
-        required_version = formatted_version
-        version_missing = required_version not in available_versions
 
         msg = _format_cuda_error_message(
             available_versions=available_versions,
             user_cuda_version=f"{self.user_cuda_version[0]}.{self.user_cuda_version[1]}",
-            override_info=override_info,
             original_error=f"Original error: {self.error_msg}\n" if self.error_msg else "",
-            include_diagnostics=True,
-            required_version=formatted_version,
-            version_missing=version_missing,
+            requested_version=requested_version,
         )
         raise RuntimeError(msg)
 
@@ -212,21 +185,15 @@ def get_native_library() -> BNBNativeLibrary:
         if cuda_binary_path.exists():
             binary_path = cuda_binary_path
         else:
-            available_versions = get_available_cuda_binaries()
+            available_versions = get_available_cuda_binary_versions()
             env_version = os.environ.get("BNB_CUDA_VERSION")
-            override_info = "\nCUDA override active" if env_version else ""
 
-            formatted_version = parse_cuda_version(env_version) if env_version else cuda_specs.cuda_version_string
-            required_version = formatted_version
-            version_missing = required_version not in available_versions
+            requested_version = parse_cuda_version(env_version) if env_version else cuda_specs.cuda_version_string
 
             msg = _format_cuda_error_message(
                 available_versions=available_versions,
                 user_cuda_version=cuda_specs.cuda_version_string,
-                override_info=override_info,
-                include_override_notes=True,
-                required_version=formatted_version,
-                version_missing=version_missing,
+                requested_version=requested_version,
             )
             logger.warning(msg)
 
@@ -249,19 +216,5 @@ def get_native_library() -> BNBNativeLibrary:
     error_msg = f"Could not load bitsandbytes native library: {e}"
     logger.error(error_msg, exc_info=False)
 
-    diagnostic_help = ""
-    if torch.cuda.is_available():
-        diagnostic_help = (
-            "CUDA Setup failed despite CUDA being available.\n\n"
-            "Please run the following command to get more information:\n\n"
-            "python -m bitsandbytes\n\n"
-            "Inspect the output of the command and see if you can locate CUDA libraries. "
-            "You might need to add them to your LD_LIBRARY_PATH. "
-            "If you suspect a bug, please take the information from the command and open an issue at:\n\n"
-            "https://github.com/bitsandbytes-foundation/bitsandbytes/issues\n\n"
-            "If you are using a custom CUDA version, you might need to set the BNB_CUDA_VERSION "
-            "environment variable to the correct version."
-        )
-
     # create a mock with error messaging as fallback
-    lib = MockBNBNativeLibrary(diagnostic_help)
+    lib = MockBNBNativeLibrary(error_msg)

From 43ee60f9e7b7c13eb533a0bcdc5e1cc52c367d15 Mon Sep 17 00:00:00 2001
From: Titus von Koeller <9048635+Titus-von-Koeller@users.noreply.github.com>
Date: Wed, 30 Apr 2025 16:24:34 +0000
Subject: [PATCH 05/12] further cleanup

---
 bitsandbytes/cextension.py | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/bitsandbytes/cextension.py b/bitsandbytes/cextension.py
index 5935282c3..9fc56cca6 100644
--- a/bitsandbytes/cextension.py
+++ b/bitsandbytes/cextension.py
@@ -89,12 +89,16 @@ def _format_cuda_error_message(
     original_error: str = "",
     requested_version: Optional[str] = None,
 ) -> str:
-    base_msg = "Attempted to use bitsandbytes native library functionality but it's not available.\n\n"
+    analysis = ""
+    no_cpu_lib_found = "libbitsandbytes_cpu.so: cannot open" in original_error
+    no_cuda_lib_found = requested_version not in available_versions
+
+    if no_cpu_lib_found:
+        analysis = "🚨 Needed to load CPU-only bitsandbytes library, but it's not available 🚨\n\n"
 
-    version_alert = ""
-    if requested_version not in available_versions:
+    elif no_cuda_lib_found:
         version_list_str = "\n  - " + "\n  - ".join(available_versions) if available_versions else "NONE"
-        version_alert = (
+        analysis = (
             f"🚨 CUDA VERSION MISMATCH 🚨\n"
             f"Requested CUDA version:          {requested_version}\n"
             f"Detected PyTorch CUDA version:   {user_cuda_version}\n"
@@ -107,11 +111,16 @@ def _format_cuda_error_message(
             "3. The installation will NOT work until you compile or choose a CUDA supported version\n\n"
         )
 
+    base_msg = "Attempted to use bitsandbytes native library functionality but it's not available.\n\n"
+
     troubleshooting = (
-        "This typically happens when:\n"
-        "1. bitsandbytes doesn't ship with a pre-compiled binary for your CUDA version\n"
+        "This typically happens when:\n1. bitsandbytes doesn't ship with a pre-compiled binary for your CUDA version\n"
+        if no_cuda_lib_found
+        else "1. You checked the code out from source and your torch installation doesn't detect CUDA on your machine\n"
         "2. The library wasn't compiled properly during installation from source\n"
         "3. Missing CUDA dependencies\n\n"
+        if no_cuda_lib_found
+        else ""
     )
 
     note = (
@@ -137,7 +146,7 @@ def _format_cuda_error_message(
         "   https://github.com/bitsandbytes-foundation/bitsandbytes/issues\n\n"
     )
 
-    return f"{version_alert}{base_msg}{troubleshooting}{note}{compile_instructions}{original_error}\n{diagnostics}"
+    return f"{analysis}{base_msg}{troubleshooting}{note}{compile_instructions}{original_error}\n{diagnostics}"
 
 
 class MockBNBNativeLibrary(BNBNativeLibrary):

From bc2c2a879728d88bda95971654738528e1cf7b74 Mon Sep 17 00:00:00 2001
From: Titus von Koeller <9048635+Titus-von-Koeller@users.noreply.github.com>
Date: Mon, 5 May 2025 14:01:08 +0000
Subject: [PATCH 06/12] validated case1 missing dep + case2 custom cuda, yet
 missing lib

---
 bitsandbytes/cextension.py | 212 ++++++++++++++++++++++++++++---------
 1 file changed, 162 insertions(+), 50 deletions(-)

diff --git a/bitsandbytes/cextension.py b/bitsandbytes/cextension.py
index 9fc56cca6..ef072a0e5 100644
--- a/bitsandbytes/cextension.py
+++ b/bitsandbytes/cextension.py
@@ -83,7 +83,7 @@ def parse_cuda_version(version_str: str) -> str:
     return version_str  # fallback as safety net
 
 
-def _format_cuda_error_message(
+def _format_lib_error_message(
     available_versions: list[str],
     user_cuda_version: str,
     original_error: str = "",
@@ -94,7 +94,7 @@ def _format_cuda_error_message(
     no_cuda_lib_found = requested_version not in available_versions
 
     if no_cpu_lib_found:
-        analysis = "🚨 Needed to load CPU-only bitsandbytes library, but it's not available 🚨\n\n"
+        analysis = "🚨 Failed to load CPU-only bitsandbytes library 🚨\n\n"
 
     elif no_cuda_lib_found:
         version_list_str = "\n  - " + "\n  - ".join(available_versions) if available_versions else "NONE"
@@ -104,36 +104,45 @@ def _format_cuda_error_message(
             f"Detected PyTorch CUDA version:   {user_cuda_version}\n"
             f"Available pre-compiled versions: {version_list_str}\n\n"
             "This means:\n"
-            "1. The version you're trying to use is NOT distributed with this package\n"
+            "The version you're trying to use is NOT distributed with this package\n\n"
             if available_versions
             else "1. You're not using the package but checked-out the source code\n"
             "2. You MUST compile from source for this specific CUDA version\n"
-            "3. The installation will NOT work until you compile or choose a CUDA supported version\n\n"
+            "3. The installation will NOT work until you compile or choose a CUDA supported version via `export BNB_CUDA_VERSION=<version>`\n\n"
         )
 
     base_msg = "Attempted to use bitsandbytes native library functionality but it's not available.\n\n"
 
     troubleshooting = (
-        "This typically happens when:\n1. bitsandbytes doesn't ship with a pre-compiled binary for your CUDA version\n"
-        if no_cuda_lib_found
-        else "1. You checked the code out from source and your torch installation doesn't detect CUDA on your machine\n"
-        "2. The library wasn't compiled properly during installation from source\n"
-        "3. Missing CUDA dependencies\n\n"
+        (
+            "This typically happens when:\n"
+            "1. bitsandbytes doesn't ship with a pre-compiled binary for your CUDA version\n"
+            "2. The library wasn't compiled properly during installation from source\n"
+            "3. Missing CUDA dependencies\n\n"
+        )
         if no_cuda_lib_found
-        else ""
+        else "This typically happens when you checked the code out from source and your torch installation doesn't detect CUDA on your machine.\n\n"
     )
 
     note = (
-        "To make bitsandbytes work, the compiled library version MUST exactly match the linked CUDA version.\n"
-        "If your CUDA version doesn't have a pre-compiled binary, you MUST compile from source.\n\n"
+        (
+            "To make bitsandbytes work, the compiled library version MUST exactly match the linked CUDA version.\n"
+            "If your CUDA version doesn't have a pre-compiled binary, you MUST compile from source.\n\n"
+        )
+        if no_cuda_lib_found
+        else ""
     )
 
     compile_instructions = (
-        "You have three options:\n"
-        "1. COMPILE FROM SOURCE (required if no binary exists):\n"
-        "   https://huggingface.co/docs/bitsandbytes/main/en/installation#cuda-compile\n"
-        "2. Use BNB_CUDA_VERSION to specify a DIFFERENT CUDA version from the detected one, which is installed on your machine and matching an available pre-compiled version listed above\n"
-        "3. Check LD_LIBRARY_PATH contains the correct CUDA libraries\n\n"
+        (
+            "You have three options:\n"
+            "1. COMPILE FROM SOURCE (required if no binary exists):\n"
+            "   https://huggingface.co/docs/bitsandbytes/main/en/installation#cuda-compile\n"
+            "2. Use BNB_CUDA_VERSION to specify a DIFFERENT CUDA version from the detected one, which is installed on your machine and matching an available pre-compiled version listed above\n"
+            "3. Check LD_LIBRARY_PATH contains the correct CUDA libraries\n\n"
+        )
+        if no_cuda_lib_found
+        else "COMPILE FROM SOURCE for CPU-only:\n  `cmake -DCOMPUTE_BACKEND=cpu -S . && make`\n\n"
     )
 
     diagnostics = (
@@ -149,7 +158,7 @@ def _format_cuda_error_message(
     return f"{analysis}{base_msg}{troubleshooting}{note}{compile_instructions}{original_error}\n{diagnostics}"
 
 
-class MockBNBNativeLibrary(BNBNativeLibrary):
+class ErrorHandlerMockBNBNativeLibrary(BNBNativeLibrary):
     """
     Mock BNBNativeLibrary that raises an error when trying to use native library
     functionality without successfully loading the library.
@@ -160,24 +169,133 @@ class MockBNBNativeLibrary(BNBNativeLibrary):
     def __init__(self, error_msg: str):
         self.error_msg = error_msg
         self.user_cuda_version = get_cuda_version_tuple()
+        self.available_versions = get_available_cuda_binary_versions()
+        self.override_value = os.environ.get("BNB_CUDA_VERSION")
+        self.requested_version = (
+            parse_cuda_version(self.override_value)
+            if self.override_value
+            else f"{self.user_cuda_version[0]}.{self.user_cuda_version[1]}"
+            if self.user_cuda_version
+            else "unknown"
+        )
 
-    def __getattr__(self, name):
-        available_versions = get_available_cuda_binary_versions()
-        override_value = os.environ.get("BNB_CUDA_VERSION")
+        # Pre-generate the error message based on error type
+        if "cannot open shared object file" in error_msg:
+            self.formatted_error = self._format_dependency_error()
+        else:  # lib loading errors
+            self.formatted_error = self._format_lib_error_message(
+                available_versions=self.available_versions,
+                user_cuda_version=f"{self.user_cuda_version[0]}.{self.user_cuda_version[1]}"
+                if self.user_cuda_version
+                else "unknown",
+                original_error=f"Original error: {self.error_msg}\n" if self.error_msg else "",
+                requested_version=self.requested_version,
+            )
 
-        requested_version = (
-            parse_cuda_version(override_value)
-            if override_value
-            else f"{self.user_cuda_version[0]}.{self.user_cuda_version[1]}"
+    def _format_lib_error_message(
+        self,
+        available_versions: list[str],
+        user_cuda_version: str,
+        original_error: str = "",
+        requested_version: Optional[str] = None,
+    ) -> str:
+        """Format detailed error message for library loading failures"""
+        analysis = ""
+        no_cpu_lib_found = "libbitsandbytes_cpu.so: cannot open" in original_error
+        no_cuda_lib_found = "CUDA binary not found" in original_error
+
+        if no_cpu_lib_found:
+            analysis = "\n🚨 Failed to load CPU-only bitsandbytes library 🚨\n\n"
+
+        elif no_cuda_lib_found:
+            version_list_str = "\n  - " + "\n  - ".join(available_versions) if available_versions else "NONE"
+            analysis = (
+                f"\n🚨 CUDA VERSION MISMATCH 🚨\n"
+                f"Requested CUDA version:          {requested_version}\n"
+                f"Detected PyTorch CUDA version:   {user_cuda_version}\n"
+                f"Available pre-compiled versions: {version_list_str}\n\n"
+                "This means:\n"
+                "The version you're trying to use is NOT distributed with this package\n\n"
+                if available_versions
+                else "1. You're not using the package but checked-out the source code\n"
+                "2. You MUST compile from source for this specific CUDA version\n"
+                "3. The installation will NOT work until you compile or choose a CUDA supported version via export BNB_CUDA_VERSION=<version>\n\n"
+            )
+
+        base_msg = "Attempted to use bitsandbytes native library functionality but it's not available.\n\n"
+
+        troubleshooting = (
+            (
+                "This typically happens when:\n"
+                "1. bitsandbytes doesn't ship with a pre-compiled binary for your CUDA version\n"
+                "2. The library wasn't compiled properly during installation from source\n\n"
+            )
+            if no_cuda_lib_found
+            else "This typically happens when you checked the code out from source and your torch installation doesn't detect CUDA on your machine.\n\n"
         )
 
-        msg = _format_cuda_error_message(
-            available_versions=available_versions,
-            user_cuda_version=f"{self.user_cuda_version[0]}.{self.user_cuda_version[1]}",
-            original_error=f"Original error: {self.error_msg}\n" if self.error_msg else "",
-            requested_version=requested_version,
+        note = (
+            (
+                "To make bitsandbytes work, the compiled library version MUST exactly match the linked CUDA version.\n"
+                "If your CUDA version doesn't have a pre-compiled binary, you MUST compile from source.\n\n"
+            )
+            if no_cuda_lib_found
+            else ""
         )
-        raise RuntimeError(msg)
+
+        compile_instructions = (
+            (
+                "You have two options:\n"
+                "1. COMPILE FROM SOURCE (required if no binary exists):\n"
+                "   https://huggingface.co/docs/bitsandbytes/main/en/installation#cuda-compile\n"
+                "2. Use BNB_CUDA_VERSION to specify a DIFFERENT CUDA version from the detected one, which is installed on your machine and matching an available pre-compiled version listed above\n\n"
+            )
+            if no_cuda_lib_found
+            else "COMPILE FROM SOURCE for CPU-only:\n  `cmake -DCOMPUTE_BACKEND=cpu -S . && make`\n\n"
+        )
+
+        diagnostics = (
+            "🔍 Run this command for detailed diagnostics:\n"
+            "python -m bitsandbytes\n\n"
+            "If you've tried everything and still have issues:\n"
+            "1. Include ALL version info (operating system, bitsandbytes, pytorch, cuda, python)\n"
+            "2. Describe what you've tried in detail\n"
+            "3. Open an issue with this information:\n"
+            "   https://github.com/bitsandbytes-foundation/bitsandbytes/issues\n\n"
+        )
+
+        return f"{analysis}{base_msg}{troubleshooting}{note}{compile_instructions}{original_error}\n{diagnostics}"
+
+    def _format_dependency_error(self) -> str:
+        """Format error message for missing shared libraries"""
+        # Extract missing library name from error
+        error_parts = self.error_msg.split(":")
+        missing_lib = error_parts[0].strip() if len(error_parts) > 0 else "unknown library"
+        cuda_major_version = (
+            self.requested_version.split(".")[0] if "." in self.requested_version else self.requested_version
+        )
+
+        return (
+            f"\n🚨 CUDA SETUP ERROR: Missing dependency: {missing_lib} 🚨\n\n"
+            f"CUDA {cuda_major_version}.x runtime libraries were not found in the LD_LIBRARY_PATH.\n\n"
+            f"To fix this, make sure that:\n"
+            f"1. You have installed CUDA {cuda_major_version}.x toolkit on your system\n"
+            f"2. The CUDA runtime libraries are in your LD_LIBRARY_PATH\n\n"
+            f"You can add them with (and persist the change by adding the line to your .bashrc):\n"
+            f"   export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/path/to/cuda-{cuda_major_version}.x/lib64\n\n"
+            f"Original error: {self.error_msg}\n\n"
+            f"🔍 Run this command for detailed diagnostics:\n"
+            f"python -m bitsandbytes\n\n"
+            f"If you've tried everything and still have issues:\n"
+            f"1. Include ALL version info (operating system, bitsandbytes, pytorch, cuda, python)\n"
+            f"2. Describe what you've tried in detail\n"
+            f"3. Open an issue with this information:\n"
+            f"   https://github.com/bitsandbytes-foundation/bitsandbytes/issues\n\n"
+        )
+
+    def __getattr__(self, name):
+        """Raise error with detailed message when any attribute is accessed"""
+        raise RuntimeError(f"{self.formatted_error}Native code method attempted to access: lib.{name}()")
 
     def __getitem__(self, name):
         return self.__getattr__(name)
@@ -187,26 +305,20 @@ def get_native_library() -> BNBNativeLibrary:
     """
     Load CUDA library XOR CPU, as the latter contains a subset of symbols of the former.
     """
-    binary_path = PACKAGE_DIR / f"libbitsandbytes_cpu{DYNAMIC_LIBRARY_SUFFIX}"
     cuda_specs = get_cuda_specs()
+    binary_path = PACKAGE_DIR / f"libbitsandbytes_cpu{DYNAMIC_LIBRARY_SUFFIX}"
+
     if cuda_specs:
         cuda_binary_path = get_cuda_bnb_library_path(cuda_specs)
-        if cuda_binary_path.exists():
-            binary_path = cuda_binary_path
-        else:
-            available_versions = get_available_cuda_binary_versions()
-            env_version = os.environ.get("BNB_CUDA_VERSION")
-
-            requested_version = parse_cuda_version(env_version) if env_version else cuda_specs.cuda_version_string
-
-            msg = _format_cuda_error_message(
-                available_versions=available_versions,
-                user_cuda_version=cuda_specs.cuda_version_string,
-                requested_version=requested_version,
-            )
-            logger.warning(msg)
+
+        if not cuda_binary_path.exists():
+            raise RuntimeError(f"Configured CUDA binary not found at {cuda_binary_path}")
+
+        binary_path = cuda_binary_path
 
     logger.debug(f"Loading bitsandbytes native library from: {binary_path}")
+
+    # Try to load the library - any errors will propagate up
     dll = ct.cdll.LoadLibrary(str(binary_path))
 
     if hasattr(dll, "get_context"):  # only a CUDA-built library exposes this
@@ -214,7 +326,7 @@ def get_native_library() -> BNBNativeLibrary:
 
     logger.warning(
         "The installed version of bitsandbytes was compiled without GPU support. "
-        "8-bit optimizers and GPU quantization are unavailable.",
+        "8-bit optimizers and GPU quantization are unavailable."
     )
     return BNBNativeLibrary(dll)
 
@@ -222,8 +334,8 @@ def get_native_library() -> BNBNativeLibrary:
 try:
     lib = get_native_library()
 except Exception as e:
-    error_msg = f"Could not load bitsandbytes native library: {e}"
-    logger.error(error_msg, exc_info=False)
+    error_msg = str(e)
+    logger.error(f"bitsandbytes library load error: {error_msg}\n", exc_info=True)
 
     # create a mock with error messaging as fallback
-    lib = MockBNBNativeLibrary(error_msg)
+    lib = ErrorHandlerMockBNBNativeLibrary(error_msg)

From 71a9ce418aecf9bbfd33c3338c2d8374db85a5e7 Mon Sep 17 00:00:00 2001
From: Titus von Koeller <9048635+Titus-von-Koeller@users.noreply.github.com>
Date: Mon, 5 May 2025 14:28:55 +0000
Subject: [PATCH 07/12] delete dead code

---
 bitsandbytes/cextension.py | 75 --------------------------------------
 1 file changed, 75 deletions(-)

diff --git a/bitsandbytes/cextension.py b/bitsandbytes/cextension.py
index ef072a0e5..8bc2f4ca6 100644
--- a/bitsandbytes/cextension.py
+++ b/bitsandbytes/cextension.py
@@ -83,81 +83,6 @@ def parse_cuda_version(version_str: str) -> str:
     return version_str  # fallback as safety net
 
 
-def _format_lib_error_message(
-    available_versions: list[str],
-    user_cuda_version: str,
-    original_error: str = "",
-    requested_version: Optional[str] = None,
-) -> str:
-    analysis = ""
-    no_cpu_lib_found = "libbitsandbytes_cpu.so: cannot open" in original_error
-    no_cuda_lib_found = requested_version not in available_versions
-
-    if no_cpu_lib_found:
-        analysis = "🚨 Failed to load CPU-only bitsandbytes library 🚨\n\n"
-
-    elif no_cuda_lib_found:
-        version_list_str = "\n  - " + "\n  - ".join(available_versions) if available_versions else "NONE"
-        analysis = (
-            f"🚨 CUDA VERSION MISMATCH 🚨\n"
-            f"Requested CUDA version:          {requested_version}\n"
-            f"Detected PyTorch CUDA version:   {user_cuda_version}\n"
-            f"Available pre-compiled versions: {version_list_str}\n\n"
-            "This means:\n"
-            "The version you're trying to use is NOT distributed with this package\n\n"
-            if available_versions
-            else "1. You're not using the package but checked-out the source code\n"
-            "2. You MUST compile from source for this specific CUDA version\n"
-            "3. The installation will NOT work until you compile or choose a CUDA supported version via `export BNB_CUDA_VERSION=<version>`\n\n"
-        )
-
-    base_msg = "Attempted to use bitsandbytes native library functionality but it's not available.\n\n"
-
-    troubleshooting = (
-        (
-            "This typically happens when:\n"
-            "1. bitsandbytes doesn't ship with a pre-compiled binary for your CUDA version\n"
-            "2. The library wasn't compiled properly during installation from source\n"
-            "3. Missing CUDA dependencies\n\n"
-        )
-        if no_cuda_lib_found
-        else "This typically happens when you checked the code out from source and your torch installation doesn't detect CUDA on your machine.\n\n"
-    )
-
-    note = (
-        (
-            "To make bitsandbytes work, the compiled library version MUST exactly match the linked CUDA version.\n"
-            "If your CUDA version doesn't have a pre-compiled binary, you MUST compile from source.\n\n"
-        )
-        if no_cuda_lib_found
-        else ""
-    )
-
-    compile_instructions = (
-        (
-            "You have three options:\n"
-            "1. COMPILE FROM SOURCE (required if no binary exists):\n"
-            "   https://huggingface.co/docs/bitsandbytes/main/en/installation#cuda-compile\n"
-            "2. Use BNB_CUDA_VERSION to specify a DIFFERENT CUDA version from the detected one, which is installed on your machine and matching an available pre-compiled version listed above\n"
-            "3. Check LD_LIBRARY_PATH contains the correct CUDA libraries\n\n"
-        )
-        if no_cuda_lib_found
-        else "COMPILE FROM SOURCE for CPU-only:\n  `cmake -DCOMPUTE_BACKEND=cpu -S . && make`\n\n"
-    )
-
-    diagnostics = (
-        "🔍 Run this command for detailed diagnostics:\n"
-        "python -m bitsandbytes\n\n"
-        "If you've tried everything and still have issues:\n"
-        "1. Include ALL version info (operating system, bitsandbytes, pytorch, cuda, python)\n"
-        "2. Describe what you've tried in detail\n"
-        "3. Open an issue with this information:\n"
-        "   https://github.com/bitsandbytes-foundation/bitsandbytes/issues\n\n"
-    )
-
-    return f"{analysis}{base_msg}{troubleshooting}{note}{compile_instructions}{original_error}\n{diagnostics}"
-
-
 class ErrorHandlerMockBNBNativeLibrary(BNBNativeLibrary):
     """
     Mock BNBNativeLibrary that raises an error when trying to use native library

From 0acab1c6e4dda64f4f437278ae2d2dc7d6266aa3 Mon Sep 17 00:00:00 2001
From: Titus von Koeller <9048635+Titus-von-Koeller@users.noreply.github.com>
Date: Tue, 6 May 2025 14:39:26 +0000
Subject: [PATCH 08/12] case 3,4a/b: no lib but cuda validated

---
 bitsandbytes/cextension.py | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/bitsandbytes/cextension.py b/bitsandbytes/cextension.py
index 8bc2f4ca6..68757eb5c 100644
--- a/bitsandbytes/cextension.py
+++ b/bitsandbytes/cextension.py
@@ -135,16 +135,18 @@ def _format_lib_error_message(
         elif no_cuda_lib_found:
             version_list_str = "\n  - " + "\n  - ".join(available_versions) if available_versions else "NONE"
             analysis = (
-                f"\n🚨 CUDA VERSION MISMATCH 🚨\n"
-                f"Requested CUDA version:          {requested_version}\n"
-                f"Detected PyTorch CUDA version:   {user_cuda_version}\n"
-                f"Available pre-compiled versions: {version_list_str}\n\n"
-                "This means:\n"
-                "The version you're trying to use is NOT distributed with this package\n\n"
+                (
+                    f"\n🚨 CUDA VERSION MISMATCH 🚨\n"
+                    f"Requested CUDA version:          {requested_version}\n"
+                    f"Detected PyTorch CUDA version:   {user_cuda_version}\n"
+                    f"Available pre-compiled versions: {version_list_str}\n\n"
+                    "This means:\n"
+                    "The version you're trying to use is NOT distributed with this package\n\n"
+                )
                 if available_versions
-                else "1. You're not using the package but checked-out the source code\n"
-                "2. You MUST compile from source for this specific CUDA version\n"
-                "3. The installation will NOT work until you compile or choose a CUDA supported version via export BNB_CUDA_VERSION=<version>\n\n"
+                else "\n🚨 Forgot to compile the bitsandbytes library? 🚨\n"
+                "1. You're not using the package but checked-out the source code\n"
+                "2. You MUST compile from source\n\n"
             )
 
         base_msg = "Attempted to use bitsandbytes native library functionality but it's not available.\n\n"

From d4df4b79ac2187dc9897c3d5409344be9e8e9cde Mon Sep 17 00:00:00 2001
From: Titus von Koeller <9048635+Titus-von-Koeller@users.noreply.github.com>
Date: Tue, 6 May 2025 16:18:02 +0000
Subject: [PATCH 09/12] don't error when not calling + doc string

---
 bitsandbytes/cextension.py | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/bitsandbytes/cextension.py b/bitsandbytes/cextension.py
index 68757eb5c..140847103 100644
--- a/bitsandbytes/cextension.py
+++ b/bitsandbytes/cextension.py
@@ -45,11 +45,19 @@ class BNBNativeLibrary:
     def __init__(self, lib: ct.CDLL):
         self._lib = lib
 
-    def __getattr__(self, item):
-        return getattr(self._lib, item)
+    def __getattr__(self, name):
+        def throw_on_call(*args, **kwargs):
+            if hasattr(self._lib, name):
+                return getattr(self._lib, name)(*args, **kwargs)
+            raise RuntimeError(
+                f"Method '{name}' not available in CPU-only version of bitsandbytes.\n"
+                "Reinstall with GPU support or use CUDA-enabled hardware."
+            )
+
+        return throw_on_call
 
     def __getitem__(self, item):
-        return getattr(self._lib, item)
+        return self.__getattr__(item)
 
 
 class CudaBNBNativeLibrary(BNBNativeLibrary):
@@ -221,8 +229,12 @@ def _format_dependency_error(self) -> str:
         )
 
     def __getattr__(self, name):
-        """Raise error with detailed message when any attribute is accessed"""
-        raise RuntimeError(f"{self.formatted_error}Native code method attempted to access: lib.{name}()")
+        """Return a dummy function that throws when called, rather than on attribute access"""
+
+        def throw_on_call(*args, **kwargs):
+            raise RuntimeError(f"{self.formatted_error}Native code method attempted to call: lib.{name}()")
+
+        return throw_on_call
 
     def __getitem__(self, name):
         return self.__getattr__(name)

From fe6cd17ef4bb97fc365e69b1a04ddb3897b99c0b Mon Sep 17 00:00:00 2001
From: Titus von Koeller <9048635+Titus-von-Koeller@users.noreply.github.com>
Date: Tue, 6 May 2025 16:18:21 +0000
Subject: [PATCH 10/12] update doc-string

---
 bitsandbytes/cextension.py | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/bitsandbytes/cextension.py b/bitsandbytes/cextension.py
index 140847103..3fb8db26f 100644
--- a/bitsandbytes/cextension.py
+++ b/bitsandbytes/cextension.py
@@ -93,10 +93,23 @@ def parse_cuda_version(version_str: str) -> str:
 
 class ErrorHandlerMockBNBNativeLibrary(BNBNativeLibrary):
     """
-    Mock BNBNativeLibrary that raises an error when trying to use native library
-    functionality without successfully loading the library.
-    Any method or attribute access will raise a RuntimeError with a message that
-    points to the original error and provides troubleshooting steps.
+    Mock library handler that defers errors until native methods are called.
+
+    This class serves as a fallback when the native bitsandbytes library fails to load.
+    It captures the original error and generates detailed troubleshooting guidance.
+
+    Key behaviors:
+    - Allows attribute access and method assignment without immediate errors
+    - Throws a RuntimeError with diagnostic information only when a native method is called, as otherwise it would error out on import, breaking backward compatibility
+    - Handles both missing CUDA dependencies and version mismatch scenarios
+
+    Error scenarios covered:
+    1. Missing shared library dependencies (e.g., libcudart.so not in LD_LIBRARY_PATH or through PyTorch CUDA installation)
+    2. CUDA version mismatch between PyTorch and available pre-compiled binaries
+    3. Completely missing pre-compiled binaries when CUDA is detected
+    4. Custom BNB_CUDA_VERSION override but mismatch
+    5. CPU-only installation attempts when GPU functionality is requested
+
     """
 
     def __init__(self, error_msg: str):

From f1d9d80a0bca4f193e131fe14997d9fe99a02070 Mon Sep 17 00:00:00 2001
From: Titus von Koeller <9048635+Titus-von-Koeller@users.noreply.github.com>
Date: Tue, 6 May 2025 16:20:33 +0000
Subject: [PATCH 11/12] lib methods can now safely be assigned, no more cryptic
 errors on missing lib

---
 bitsandbytes/functional.py | 191 ++++++++++++++++++-------------------
 1 file changed, 95 insertions(+), 96 deletions(-)

diff --git a/bitsandbytes/functional.py b/bitsandbytes/functional.py
index c9341230f..91351386e 100644
--- a/bitsandbytes/functional.py
+++ b/bitsandbytes/functional.py
@@ -19,102 +19,101 @@
 
 name2qmap = {}
 
-if lib and lib.compiled_with_cuda:
-    """C FUNCTIONS FOR OPTIMIZERS"""
-    str2optimizer32bit = {
-        "adam": (
-            lib.cadam32bit_grad_fp32,
-            lib.cadam32bit_grad_fp16,
-            lib.cadam32bit_grad_bf16,
-        ),
-        "momentum": (
-            lib.cmomentum32bit_grad_32,
-            lib.cmomentum32bit_grad_16,
-        ),
-        "rmsprop": (
-            lib.crmsprop32bit_grad_32,
-            lib.crmsprop32bit_grad_16,
-        ),
-        "lion": (
-            lib.clion32bit_grad_fp32,
-            lib.clion32bit_grad_fp16,
-            lib.clion32bit_grad_bf16,
-        ),
-        "adagrad": (
-            lib.cadagrad32bit_grad_32,
-            lib.cadagrad32bit_grad_16,
-        ),
-        "lamb": (
-            lib.cadam32bit_grad_fp32,
-            lib.cadam32bit_grad_fp16,
-            lib.cadam32bit_grad_bf16,
-        ),
-        "ademamix": (
-            lib.cademamix32bit_grad_fp32,
-            lib.cademamix32bit_grad_fp16,
-            lib.cademamix32bit_grad_bf16,
-        ),
-    }
-
-    str2optimizer8bit = {
-        "adam": (
-            lib.cadam_static_8bit_grad_32,
-            lib.cadam_static_8bit_grad_16,
-        ),
-        "momentum": (
-            lib.cmomentum_static_8bit_grad_32,
-            lib.cmomentum_static_8bit_grad_16,
-        ),
-        "rmsprop": (
-            lib.crmsprop_static_8bit_grad_32,
-            lib.crmsprop_static_8bit_grad_16,
-        ),
-        "lion": (
-            lib.clion_static_8bit_grad_32,
-            lib.clion_static_8bit_grad_16,
-        ),
-        "lamb": (
-            lib.cadam_static_8bit_grad_32,
-            lib.cadam_static_8bit_grad_16,
-        ),
-        "lars": (
-            lib.cmomentum_static_8bit_grad_32,
-            lib.cmomentum_static_8bit_grad_16,
-        ),
-    }
-
-    str2optimizer8bit_blockwise = {
-        "adam": (
-            lib.cadam_8bit_blockwise_grad_fp32,
-            lib.cadam_8bit_blockwise_grad_fp16,
-            lib.cadam_8bit_blockwise_grad_bf16,
-        ),
-        "momentum": (
-            lib.cmomentum_8bit_blockwise_grad_fp32,
-            lib.cmomentum_8bit_blockwise_grad_fp16,
-            lib.cmomentum_8bit_blockwise_grad_bf16,
-        ),
-        "rmsprop": (
-            lib.crmsprop_8bit_blockwise_grad_fp32,
-            lib.crmsprop_8bit_blockwise_grad_fp16,
-            lib.crmsprop_8bit_blockwise_grad_bf16,
-        ),
-        "lion": (
-            lib.clion_8bit_blockwise_grad_fp32,
-            lib.clion_8bit_blockwise_grad_fp16,
-            lib.clion_8bit_blockwise_grad_bf16,
-        ),
-        "adagrad": (
-            lib.cadagrad_8bit_blockwise_grad_fp32,
-            lib.cadagrad_8bit_blockwise_grad_fp16,
-            lib.cadagrad_8bit_blockwise_grad_bf16,
-        ),
-        "ademamix": (
-            lib.cademamix_8bit_blockwise_grad_fp32,
-            lib.cademamix_8bit_blockwise_grad_fp16,
-            lib.cademamix_8bit_blockwise_grad_bf16,
-        ),
-    }
+"""C FUNCTIONS FOR OPTIMIZERS"""
+str2optimizer32bit = {
+    "adam": (
+        lib.cadam32bit_grad_fp32,
+        lib.cadam32bit_grad_fp16,
+        lib.cadam32bit_grad_bf16,
+    ),
+    "momentum": (
+        lib.cmomentum32bit_grad_32,
+        lib.cmomentum32bit_grad_16,
+    ),
+    "rmsprop": (
+        lib.crmsprop32bit_grad_32,
+        lib.crmsprop32bit_grad_16,
+    ),
+    "lion": (
+        lib.clion32bit_grad_fp32,
+        lib.clion32bit_grad_fp16,
+        lib.clion32bit_grad_bf16,
+    ),
+    "adagrad": (
+        lib.cadagrad32bit_grad_32,
+        lib.cadagrad32bit_grad_16,
+    ),
+    "lamb": (
+        lib.cadam32bit_grad_fp32,
+        lib.cadam32bit_grad_fp16,
+        lib.cadam32bit_grad_bf16,
+    ),
+    "ademamix": (
+        lib.cademamix32bit_grad_fp32,
+        lib.cademamix32bit_grad_fp16,
+        lib.cademamix32bit_grad_bf16,
+    ),
+}
+
+str2optimizer8bit = {
+    "adam": (
+        lib.cadam_static_8bit_grad_32,
+        lib.cadam_static_8bit_grad_16,
+    ),
+    "momentum": (
+        lib.cmomentum_static_8bit_grad_32,
+        lib.cmomentum_static_8bit_grad_16,
+    ),
+    "rmsprop": (
+        lib.crmsprop_static_8bit_grad_32,
+        lib.crmsprop_static_8bit_grad_16,
+    ),
+    "lion": (
+        lib.clion_static_8bit_grad_32,
+        lib.clion_static_8bit_grad_16,
+    ),
+    "lamb": (
+        lib.cadam_static_8bit_grad_32,
+        lib.cadam_static_8bit_grad_16,
+    ),
+    "lars": (
+        lib.cmomentum_static_8bit_grad_32,
+        lib.cmomentum_static_8bit_grad_16,
+    ),
+}
+
+str2optimizer8bit_blockwise = {
+    "adam": (
+        lib.cadam_8bit_blockwise_grad_fp32,
+        lib.cadam_8bit_blockwise_grad_fp16,
+        lib.cadam_8bit_blockwise_grad_bf16,
+    ),
+    "momentum": (
+        lib.cmomentum_8bit_blockwise_grad_fp32,
+        lib.cmomentum_8bit_blockwise_grad_fp16,
+        lib.cmomentum_8bit_blockwise_grad_bf16,
+    ),
+    "rmsprop": (
+        lib.crmsprop_8bit_blockwise_grad_fp32,
+        lib.crmsprop_8bit_blockwise_grad_fp16,
+        lib.crmsprop_8bit_blockwise_grad_bf16,
+    ),
+    "lion": (
+        lib.clion_8bit_blockwise_grad_fp32,
+        lib.clion_8bit_blockwise_grad_fp16,
+        lib.clion_8bit_blockwise_grad_bf16,
+    ),
+    "adagrad": (
+        lib.cadagrad_8bit_blockwise_grad_fp32,
+        lib.cadagrad_8bit_blockwise_grad_fp16,
+        lib.cadagrad_8bit_blockwise_grad_bf16,
+    ),
+    "ademamix": (
+        lib.cademamix_8bit_blockwise_grad_fp32,
+        lib.cademamix_8bit_blockwise_grad_fp16,
+        lib.cademamix_8bit_blockwise_grad_bf16,
+    ),
+}
 
 
 class GlobalPageManager:

From ebfda25181dd0b40f84c1d7d6568f84a68e7daa7 Mon Sep 17 00:00:00 2001
From: Titus von Koeller <9048635+Titus-von-Koeller@users.noreply.github.com>
Date: Tue, 6 May 2025 16:21:19 +0000
Subject: [PATCH 12/12] cleanup python -m bnb to align with changes

---
 bitsandbytes/diagnostics/main.py | 30 +++++++++---------------------
 1 file changed, 9 insertions(+), 21 deletions(-)

diff --git a/bitsandbytes/diagnostics/main.py b/bitsandbytes/diagnostics/main.py
index 1ce096f69..b6236d668 100644
--- a/bitsandbytes/diagnostics/main.py
+++ b/bitsandbytes/diagnostics/main.py
@@ -13,21 +13,6 @@
 
 
 def sanity_check():
-    from bitsandbytes.cextension import lib
-
-    if lib is None:
-        print_dedented(
-            """
-            Couldn't load the bitsandbytes library, likely due to missing binaries.
-            Please ensure bitsandbytes is properly installed.
-
-            For source installations, compile the binaries with `cmake -DCOMPUTE_BACKEND=cuda -S .`.
-            See the documentation for more details if needed.
-
-            Trying a simple check anyway, but this will likely fail...
-            """,
-        )
-
     from bitsandbytes.optim import Adam
 
     p = torch.nn.Parameter(torch.rand(10, 10).cuda())
@@ -67,12 +52,15 @@ def main():
         print("SUCCESS!")
         print("Installation was successful!")
         return
-    except ImportError:
-        print(
-            f"WARNING: {__package__} is currently running as CPU-only!\n"
-            "Therefore, 8-bit optimizers and GPU quantization are unavailable.\n\n"
-            f"If you think that this is so erroneously,\nplease report an issue!",
-        )
+    except RuntimeError as e:
+        if "not available in CPU-only" in str(e):
+            print(
+                f"WARNING: {__package__} is currently running as CPU-only!\n"
+                "Therefore, 8-bit optimizers and GPU quantization are unavailable.\n\n"
+                f"If you think that this is so erroneously,\nplease report an issue!",
+            )
+        else:
+            raise e
     except Exception:
         traceback.print_exc()
     print_dedented(