diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 20caca291..0f446f776 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -111,7 +111,7 @@ jobs: matrix: os: [ubuntu-latest, macos-latest, windows-latest] # The specific Python version is irrelevant in this context as we are only packaging non-C extension - # code. This ensures compatibility across Python versions, including Python 3.8, as compatibility is + # code. This ensures compatibility across Python versions, including Python 3.9, as compatibility is # dictated by the packaged code itself, not the Python version used for packaging. python-version: ["3.10"] arch: [x86_64, aarch64] diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8ac37502e..70eca2f9f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.6.9 + rev: v0.11.2 hooks: - id: ruff args: diff --git a/benchmarking/int8/int8_benchmark.py b/benchmarking/int8/int8_benchmark.py index b91e5f76f..311d49985 100644 --- a/benchmarking/int8/int8_benchmark.py +++ b/benchmarking/int8/int8_benchmark.py @@ -65,4 +65,4 @@ print("=" * 40) print(f"Example:\n{tokenizer.decode(generated_ids[0])}") print("=" * 40) -print(f"Speed: {num/(time.time() - time_1)}token/s") +print(f"Speed: {num / (time.time() - time_1)}token/s") diff --git a/benchmarking/matmul_benchmark.py b/benchmarking/matmul_benchmark.py index 89b3dfb8a..6812a4fdd 100644 --- a/benchmarking/matmul_benchmark.py +++ b/benchmarking/matmul_benchmark.py @@ -66,7 +66,7 @@ def test_bench_matmul(batch, seq, model, hidden): torch.matmul(A, B.t()) torch.cuda.synchronize() print( - f"pytorch fp16: [{batch},{seq},{model}], [{model},{hidden}]->[{batch},{seq},{hidden}]: {time.time()-t0:.4f}s", + f"pytorch fp16: [{batch},{seq},{model}], [{model},{hidden}]->[{batch},{seq},{hidden}]: {time.time() - t0:.4f}s", ) # torch.cuda.synchronize() @@ -88,14 +88,16 @@ def test_bench_matmul(batch, seq, model, hidden): for i in range(iters): bnb.matmul_4bit(A, B_nf4.t(), quant_state=state_nf4) torch.cuda.synchronize() - print(f"bnb nf4: [{batch},{seq},{model}], [{model},{hidden}]->[{batch},{seq},{hidden}]: {time.time()-t0:.4f}s") + print(f"bnb nf4: [{batch},{seq},{model}], [{model},{hidden}]->[{batch},{seq},{hidden}]: {time.time() - t0:.4f}s") torch.cuda.synchronize() t0 = time.time() for i in range(iters): bnb.matmul_4bit(A, B_nf4_c.t(), quant_state=state_nf4_c) torch.cuda.synchronize() - print(f"bnb nf4+DQ: [{batch},{seq},{model}], [{model},{hidden}]->[{batch},{seq},{hidden}]: {time.time()-t0:.4f}s") + print( + f"bnb nf4+DQ: [{batch},{seq},{model}], [{model},{hidden}]->[{batch},{seq},{hidden}]: {time.time() - t0:.4f}s" + ) torch.cuda.synchronize() t0 = time.time() @@ -103,7 +105,7 @@ def test_bench_matmul(batch, seq, model, hidden): bnb.matmul(A, B) torch.cuda.synchronize() print( - f"B -> CB (each iteration): [{batch},{seq},{model}], [{model},{hidden}]->[{batch},{seq},{hidden}]: {time.time()-t0:.4f}s" + f"B -> CB (each iteration): [{batch},{seq},{model}], [{model},{hidden}]->[{batch},{seq},{hidden}]: {time.time() - t0:.4f}s" ) torch.cuda.synchronize() @@ -112,7 +114,7 @@ def test_bench_matmul(batch, seq, model, hidden): bnb.matmul(A, B, threshold=6.0) torch.cuda.synchronize() print( - f"B -> CB + threshold: [{batch},{seq},{model}], [{model},{hidden}]->[{batch},{seq},{hidden}]: {time.time()-t0:.4f}s" + f"B -> CB + threshold: [{batch},{seq},{model}], [{model},{hidden}]->[{batch},{seq},{hidden}]: {time.time() - t0:.4f}s" ) CA, SCA, _ = F.int8_vectorwise_quant(A, threshold=0.0) @@ -124,7 +126,7 @@ def test_bench_matmul(batch, seq, model, hidden): out32 = F.int8_linear_matmul(CA, CB) torch.cuda.synchronize() print( - f"no overhead int8 [{batch},{seq},{model}], [{model},{hidden}]->[{batch},{seq},{hidden}]: {time.time()-t0:.4f}s" + f"no overhead int8 [{batch},{seq},{model}], [{model},{hidden}]->[{batch},{seq},{hidden}]: {time.time() - t0:.4f}s" ) # C32A, SA = F.transform(CA, "col32") @@ -183,7 +185,7 @@ def test_bench_matmul(batch, seq, model, hidden): linear8bit(A) torch.cuda.synchronize() print( - f"bnb linear8bitlt (eval): [{batch},{seq},{model}], [{model},{hidden}]->[{batch},{seq},{hidden}]: {time.time()-t0:.4f}s" + f"bnb linear8bitlt (eval): [{batch},{seq},{model}], [{model},{hidden}]->[{batch},{seq},{hidden}]: {time.time() - t0:.4f}s" ) linearMixedBit(A) @@ -193,7 +195,7 @@ def test_bench_matmul(batch, seq, model, hidden): linearMixedBit(A) torch.cuda.synchronize() print( - f"bnb linear8bitlt with threshold (eval): [{batch},{seq},{model}], [{model},{hidden}]->[{batch},{seq},{hidden}]: {time.time()-t0:.4f}s" + f"bnb linear8bitlt with threshold (eval): [{batch},{seq},{model}], [{model},{hidden}]->[{batch},{seq},{hidden}]: {time.time() - t0:.4f}s" ) # linear8bit_train(A) diff --git a/bitsandbytes/_ops.py b/bitsandbytes/_ops.py index 0ee703219..2a12e40a1 100644 --- a/bitsandbytes/_ops.py +++ b/bitsandbytes/_ops.py @@ -1,5 +1,6 @@ +from collections.abc import Sequence from math import prod -from typing import Optional, Sequence, Tuple +from typing import Optional import torch @@ -131,7 +132,7 @@ def _( def _( A: torch.Tensor, threshold=0.0, -) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, Optional[torch.Tensor]]: +) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, Optional[torch.Tensor]]: out_row = torch.empty_like(A, dtype=torch.int8) out_col = torch.empty_like(A, dtype=torch.int8) row_stats = torch.empty(prod(A.shape[:-1]), device=A.device, dtype=torch.float32) @@ -191,7 +192,7 @@ def _( @register_fake("bitsandbytes::quantize_4bit") def _( A: torch.Tensor, blocksize: int, quant_type: str, quant_storage: torch.dtype -) -> Tuple[torch.Tensor, torch.Tensor]: +) -> tuple[torch.Tensor, torch.Tensor]: torch._check_is_size(blocksize) n = A.numel() @@ -235,7 +236,7 @@ def _( @register_fake("bitsandbytes::quantize_blockwise") -def _(A: torch.Tensor, code: torch.Tensor, blocksize: int) -> Tuple[torch.Tensor, torch.Tensor]: +def _(A: torch.Tensor, code: torch.Tensor, blocksize: int) -> tuple[torch.Tensor, torch.Tensor]: torch._check_is_size(blocksize) n = A.numel() blocks = -(n // -blocksize) diff --git a/bitsandbytes/autograd/_functions.py b/bitsandbytes/autograd/_functions.py index 9f14db754..0f97cdd08 100644 --- a/bitsandbytes/autograd/_functions.py +++ b/bitsandbytes/autograd/_functions.py @@ -1,6 +1,6 @@ from dataclasses import dataclass from math import prod -from typing import Callable, Optional, Tuple +from typing import Callable, Optional import warnings from warnings import warn @@ -55,7 +55,7 @@ def get_current_outlier_idx(self): ) def get_inverse_transform_indices( transform_tile: Callable[[torch.Tensor], torch.Tensor], - tile_size: Tuple[int, int], + tile_size: tuple[int, int], ): """ Compute a permutation of indices that invert the specified (tiled) matrix transformation diff --git a/bitsandbytes/backends/cpu/ops.py b/bitsandbytes/backends/cpu/ops.py index b286b710b..ac906b7ec 100644 --- a/bitsandbytes/backends/cpu/ops.py +++ b/bitsandbytes/backends/cpu/ops.py @@ -1,5 +1,5 @@ import ctypes as ct -from typing import Optional, Tuple +from typing import Optional import torch @@ -47,7 +47,7 @@ def _( @register_kernel("bitsandbytes::quantize_blockwise", "cpu") -def _(A: torch.Tensor, code: torch.Tensor, blocksize: int) -> Tuple[torch.Tensor, torch.Tensor]: +def _(A: torch.Tensor, code: torch.Tensor, blocksize: int) -> tuple[torch.Tensor, torch.Tensor]: torch._check_is_size(blocksize) torch._check(A.dtype == torch.float32, lambda: f"A must be float32 on cpu, got {A.dtype}") @@ -116,7 +116,7 @@ def _(A: torch.Tensor, absmax: torch.Tensor, code: torch.Tensor, blocksize: int, @register_kernel("bitsandbytes::quantize_4bit", "cpu") def _( A: torch.Tensor, blocksize: int, quant_type: str, quant_storage: torch.dtype -) -> Tuple[torch.Tensor, torch.Tensor]: +) -> tuple[torch.Tensor, torch.Tensor]: torch._check_is_size(blocksize) torch._check(quant_type == "nf4", lambda: f"quant_type must be nf4 on CPU, got {quant_type}") diff --git a/bitsandbytes/backends/cuda/ops.py b/bitsandbytes/backends/cuda/ops.py index 88bd872be..c921af53a 100644 --- a/bitsandbytes/backends/cuda/ops.py +++ b/bitsandbytes/backends/cuda/ops.py @@ -1,6 +1,7 @@ +from collections.abc import Sequence import ctypes as ct from math import prod -from typing import Optional, Sequence, Tuple +from typing import Optional import torch @@ -78,10 +79,7 @@ def _int8_linear_matmul_impl(A: torch.Tensor, B: torch.Tensor, out: torch.Tensor raise NotImplementedError("int8_linear_matmul not implemented!") else: raise RuntimeError( - f"cublasLt ran into an error!\n" - f"\t{shapeA=}, {shapeB=}, {shapeC=}\n" - f"\t{(lda, ldb, ldc)=}\n" - f"\t{(m, n, k)=}" + f"cublasLt ran into an error!\n\t{shapeA=}, {shapeB=}, {shapeC=}\n\t{(lda, ldb, ldc)=}\n\t{(m, n, k)=}" ) return out @@ -169,7 +167,7 @@ def _(A: torch.Tensor, threshold=0.0): def _( A: torch.Tensor, threshold=0.0, -) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, Optional[torch.Tensor]]: +) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, Optional[torch.Tensor]]: # Use CUDA kernel for rowwise and COO tensor quant_row, row_stats, outlier_cols = torch.ops.bitsandbytes.int8_vectorwise_quant.default( A, @@ -188,7 +186,7 @@ def _( def _get_col_absmax( A: torch.Tensor, threshold=0.0, -) -> Tuple[torch.Tensor, Optional[torch.Tensor]]: +) -> tuple[torch.Tensor, Optional[torch.Tensor]]: torch._check(A.is_floating_point()) outlier_mask = None @@ -207,7 +205,7 @@ def _get_col_absmax( @register_kernel("bitsandbytes::quantize_blockwise", "cuda") -def _(A: torch.Tensor, code: torch.Tensor, blocksize: int) -> Tuple[torch.Tensor, torch.Tensor]: +def _(A: torch.Tensor, code: torch.Tensor, blocksize: int) -> tuple[torch.Tensor, torch.Tensor]: torch._check_is_size(blocksize) torch._check(blocksize in [4096, 2048, 1024, 512, 256, 128, 64]) torch._check(code.dtype == torch.float32, lambda: f"code must be float32, got {code.dtype}") @@ -292,7 +290,7 @@ def _dequantize_blockwise_impl( @register_kernel("bitsandbytes::quantize_4bit", "cuda") def _( A: torch.Tensor, blocksize: int, quant_type: str, quant_storage: torch.dtype -) -> Tuple[torch.Tensor, torch.Tensor]: +) -> tuple[torch.Tensor, torch.Tensor]: torch._check(blocksize in [4096, 2048, 1024, 512, 256, 128, 64]) torch._check(quant_type in ["fp4", "nf4"]) torch._check( diff --git a/bitsandbytes/cuda_specs.py b/bitsandbytes/cuda_specs.py index 235296082..ca5463bc4 100644 --- a/bitsandbytes/cuda_specs.py +++ b/bitsandbytes/cuda_specs.py @@ -1,27 +1,27 @@ import dataclasses from functools import lru_cache -from typing import List, Optional, Tuple +from typing import Optional import torch @dataclasses.dataclass(frozen=True) class CUDASpecs: - highest_compute_capability: Tuple[int, int] + highest_compute_capability: tuple[int, int] cuda_version_string: str - cuda_version_tuple: Tuple[int, int] + cuda_version_tuple: tuple[int, int] @property def has_imma(self) -> bool: return torch.version.hip or self.highest_compute_capability >= (7, 5) -def get_compute_capabilities() -> List[Tuple[int, int]]: +def get_compute_capabilities() -> list[tuple[int, int]]: return sorted(torch.cuda.get_device_capability(torch.cuda.device(i)) for i in range(torch.cuda.device_count())) @lru_cache(None) -def get_cuda_version_tuple() -> Tuple[int, int]: +def get_cuda_version_tuple() -> tuple[int, int]: if torch.version.cuda: return map(int, torch.version.cuda.split(".")[0:2]) elif torch.version.hip: diff --git a/bitsandbytes/diagnostics/cuda.py b/bitsandbytes/diagnostics/cuda.py index 45dc98dea..affcb0ae6 100644 --- a/bitsandbytes/diagnostics/cuda.py +++ b/bitsandbytes/diagnostics/cuda.py @@ -1,7 +1,7 @@ +from collections.abc import Iterable, Iterator import logging import os from pathlib import Path -from typing import Dict, Iterable, Iterator import torch @@ -76,7 +76,7 @@ def is_relevant_candidate_env_var(env_var: str, value: str) -> bool: ) -def get_potentially_lib_path_containing_env_vars() -> Dict[str, str]: +def get_potentially_lib_path_containing_env_vars() -> dict[str, str]: return {env_var: value for env_var, value in os.environ.items() if is_relevant_candidate_env_var(env_var, value)} diff --git a/bitsandbytes/functional.py b/bitsandbytes/functional.py index c0e139e03..bc0ade929 100644 --- a/bitsandbytes/functional.py +++ b/bitsandbytes/functional.py @@ -2,10 +2,11 @@ # # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. +from collections.abc import Iterable import ctypes as ct import itertools from math import prod -from typing import Any, Dict, Iterable, Optional, Tuple, Union +from typing import Any, Optional, Union import numpy as np import torch @@ -619,7 +620,7 @@ def __get_item__(self, idx): return list_repr[idx] @classmethod - def from_dict(cls, qs_dict: Dict[str, Any], device: torch.device) -> "QuantState": + def from_dict(cls, qs_dict: dict[str, Any], device: torch.device) -> "QuantState": """ unpacks components of state_dict into QuantState where necessary, convert into strings, torch.dtype, ints, etc. @@ -741,7 +742,7 @@ def quantize_blockwise( out: Optional[torch.Tensor] = None, blocksize=4096, nested=False, -) -> Tuple[torch.Tensor, QuantState]: +) -> tuple[torch.Tensor, QuantState]: """Quantize a tensor in blocks of values. The input tensor is quantized by dividing it into blocks of `blocksize` values. @@ -994,7 +995,7 @@ def quantize_4bit( compress_statistics=False, quant_type="fp4", quant_storage=torch.uint8, -) -> Tuple[torch.Tensor, QuantState]: +) -> tuple[torch.Tensor, QuantState]: """Quantize tensor A in blocks of 4-bit values. Quantizes tensor A by dividing it into blocks which are independently quantized. @@ -1161,7 +1162,7 @@ def quantize( A: Tensor, code: Optional[torch.Tensor] = None, out: Optional[torch.Tensor] = None, -) -> Tuple[Tensor, Tuple[Tensor, Tensor]]: +) -> tuple[Tensor, tuple[Tensor, Tensor]]: if code is None: if "dynamic" not in name2qmap: name2qmap["dynamic"] = create_dynamic_map().to(A.device) @@ -1179,7 +1180,7 @@ def quantize( @deprecated("This function is deprecated and will be removed in a future release.", category=FutureWarning) def dequantize( A: Tensor, - state: Optional[Tuple[Tensor, Tensor]] = None, + state: Optional[tuple[Tensor, Tensor]] = None, absmax: Optional[torch.Tensor] = None, code: Optional[torch.Tensor] = None, out: Optional[torch.Tensor] = None, @@ -2006,7 +2007,7 @@ def get_colrow_absmax( col_stats: Optional[torch.Tensor] = None, nnz_block_ptr: Optional[torch.Tensor] = None, threshold=0.0, -) -> Tuple[torch.Tensor, torch.Tensor, Optional[torch.Tensor]]: +) -> tuple[torch.Tensor, torch.Tensor, Optional[torch.Tensor]]: """ "Determine the quantization statistics for input matrix `A` in accordance to the `LLM.int8()` algorithm. The row-wise and column-wise absmax values are determined. @@ -2268,9 +2269,9 @@ def spmm_coo( out: Optional[torch.Tensor] = None, ): if not isinstance(cooA, COOSparseTensor): - assert ( - cooA.is_sparse and cooA.layout == torch.sparse_coo - ), "Tensor must be `COOSparseTensor or a PyTorch COO tensor." + assert cooA.is_sparse and cooA.layout == torch.sparse_coo, ( + "Tensor must be `COOSparseTensor or a PyTorch COO tensor." + ) # Convert to custom COOSparseTensor cooA = COOSparseTensor( diff --git a/bitsandbytes/nn/modules.py b/bitsandbytes/nn/modules.py index f4d838d48..dfa688abb 100644 --- a/bitsandbytes/nn/modules.py +++ b/bitsandbytes/nn/modules.py @@ -3,7 +3,7 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. import copy -from typing import Any, Dict, Optional, TypeVar, Union, overload +from typing import Any, Optional, TypeVar, Union, overload import warnings import torch @@ -268,7 +268,7 @@ def __copy__(self): def from_prequantized( cls, data: torch.Tensor, - quantized_stats: Dict[str, Any], + quantized_stats: dict[str, Any], requires_grad: bool = False, device="cuda", module: Optional["Linear4bit"] = None, diff --git a/bitsandbytes/optim/ademamix.py b/bitsandbytes/optim/ademamix.py index 56d273103..e52d96589 100644 --- a/bitsandbytes/optim/ademamix.py +++ b/bitsandbytes/optim/ademamix.py @@ -1,5 +1,6 @@ +from collections.abc import Iterable import math -from typing import Iterable, Literal, Optional, Tuple +from typing import Literal, Optional import torch @@ -16,7 +17,7 @@ def __init__( self, params: Iterable[torch.nn.Parameter], lr: float = 1e-3, - betas: Tuple[float, float, float] = (0.9, 0.999, 0.9999), + betas: tuple[float, float, float] = (0.9, 0.999, 0.9999), alpha: float = 5.0, eps: float = 1e-8, weight_decay: float = 1e-2, # default 0.0 or 1e-2? @@ -108,7 +109,7 @@ def __init__( self, params: Iterable[torch.nn.Parameter], lr: float = 1e-3, - betas: Tuple[float, float, float] = (0.9, 0.999, 0.9999), + betas: tuple[float, float, float] = (0.9, 0.999, 0.9999), alpha: float = 5.0, t_alpha: Optional[int] = None, t_beta3: Optional[int] = None, @@ -151,7 +152,7 @@ def init_state(self, group, p, gindex, pindex): elif config["optim_bits"] == 8: dtype = torch.uint8 else: - raise NotImplementedError(f'Amount of optimizer bits not supported: {config["optim_bits"]}') + raise NotImplementedError(f"Amount of optimizer bits not supported: {config['optim_bits']}") if p.numel() < config["min_8bit_size"]: dtype = torch.float32 @@ -274,7 +275,7 @@ def __init__( self, params: Iterable[torch.nn.Parameter], lr: float = 1e-3, - betas: Tuple[float, float, float] = (0.9, 0.999, 0.9999), + betas: tuple[float, float, float] = (0.9, 0.999, 0.9999), alpha: float = 5.0, t_alpha: Optional[int] = None, t_beta3: Optional[int] = None, @@ -303,7 +304,7 @@ def __init__( self, params: Iterable[torch.nn.Parameter], lr: float = 1e-3, - betas: Tuple[float, float, float] = (0.9, 0.999, 0.9999), + betas: tuple[float, float, float] = (0.9, 0.999, 0.9999), alpha: float = 5.0, t_alpha: Optional[int] = None, t_beta3: Optional[int] = None, @@ -330,7 +331,7 @@ def __init__( self, params: Iterable[torch.nn.Parameter], lr: float = 1e-3, - betas: Tuple[float, float, float] = (0.9, 0.999, 0.9999), + betas: tuple[float, float, float] = (0.9, 0.999, 0.9999), alpha: float = 5.0, t_alpha: Optional[int] = None, t_beta3: Optional[int] = None, @@ -359,7 +360,7 @@ def __init__( self, params: Iterable[torch.nn.Parameter], lr: float = 1e-3, - betas: Tuple[float, float, float] = (0.9, 0.999, 0.9999), + betas: tuple[float, float, float] = (0.9, 0.999, 0.9999), alpha: float = 5.0, t_alpha: Optional[int] = None, t_beta3: Optional[int] = None, @@ -392,7 +393,7 @@ def __init__( self, params: Iterable[torch.nn.Parameter], lr: float = 1e-3, - betas: Tuple[float, float, float] = (0.9, 0.999, 0.9999), + betas: tuple[float, float, float] = (0.9, 0.999, 0.9999), alpha: float = 5.0, t_alpha: Optional[int] = None, t_beta3: Optional[int] = None, diff --git a/bitsandbytes/optim/optimizer.py b/bitsandbytes/optim/optimizer.py index 03e0e01d7..d4656efc4 100644 --- a/bitsandbytes/optim/optimizer.py +++ b/bitsandbytes/optim/optimizer.py @@ -450,7 +450,7 @@ def init_state(self, group, p, gindex, pindex): elif config["optim_bits"] == 8: dtype = torch.uint8 else: - raise NotImplementedError(f'Amount of optimizer bits not supported: {config["optim_bits"]}') + raise NotImplementedError(f"Amount of optimizer bits not supported: {config['optim_bits']}") if p.numel() < config["min_8bit_size"]: dtype = torch.float32 @@ -677,7 +677,7 @@ def init_state(self, group, p, gindex, pindex): elif config["optim_bits"] == 8: dtype = torch.uint8 else: - raise NotImplementedError(f'Amount of optimizer bits not supported: {config["optim_bits"]}') + raise NotImplementedError(f"Amount of optimizer bits not supported: {config['optim_bits']}") if p.numel() < config["min_8bit_size"]: dtype = torch.float32 diff --git a/bitsandbytes/triton/matmul_perf_model.py b/bitsandbytes/triton/matmul_perf_model.py index 199ceb1a3..e843a3a39 100644 --- a/bitsandbytes/triton/matmul_perf_model.py +++ b/bitsandbytes/triton/matmul_perf_model.py @@ -128,7 +128,7 @@ def estimate_matmul_time( print( f"Total time: {total_time_ms}ms, compute time: {compute_ms}ms, " f"loading time: {load_ms}ms, store time: {store_ms}ms, " - f"Activate CTAs: {active_cta_ratio*100}%" + f"Activate CTAs: {active_cta_ratio * 100}%" ) return total_time_ms diff --git a/bitsandbytes/utils.py b/bitsandbytes/utils.py index a88ddf5f9..0828dd295 100644 --- a/bitsandbytes/utils.py +++ b/bitsandbytes/utils.py @@ -1,7 +1,6 @@ import json import shlex import subprocess -from typing import Tuple import torch @@ -104,7 +103,7 @@ def find_outlier_dims(weight, reduction_dim=0, zscore=4.0, topk=None, rdm=False) return idx -def execute_and_return(command_string: str) -> Tuple[str, str]: +def execute_and_return(command_string: str) -> tuple[str, str]: def _decode(subprocess_err_out_tuple): return tuple(to_decode.decode("UTF-8").strip() for to_decode in subprocess_err_out_tuple) diff --git a/examples/int8_inference_huggingface.py b/examples/int8_inference_huggingface.py index 2d4c77952..0112727da 100644 --- a/examples/int8_inference_huggingface.py +++ b/examples/int8_inference_huggingface.py @@ -8,7 +8,7 @@ tokenizer = LlamaTokenizer.from_pretrained(model_name) input_ids = tokenizer(text, return_tensors="pt").input_ids -max_memory = f"{int(torch.cuda.mem_get_info()[0]/1024**3)-2}GB" +max_memory = f"{int(torch.cuda.mem_get_info()[0] / 1024**3) - 2}GB" n_gpus = torch.cuda.device_count() max_memory = {i: max_memory for i in range(n_gpus)} diff --git a/pyproject.toml b/pyproject.toml index f4ae66a8e..b2ffb64b2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,7 @@ maintainers = [ {name="Titus von Köller", email="titus@huggingface.co"}, {name="Matthew Douglas", email="matthew.douglas@huggingface.co"} ] -requires-python = ">=3.8" +requires-python = ">=3.9" readme = "README.md" license = {file="LICENSE"} keywords = [ @@ -34,11 +34,11 @@ classifiers = [ "Operating System :: Microsoft :: Windows", "Programming Language :: C++", "Programming Language :: Python :: Implementation :: CPython", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Topic :: Scientific/Engineering :: Artificial Intelligence" ] dependencies = [ @@ -58,7 +58,7 @@ docs = ["hf-doc-builder==0.5.0"] dev = [ "bitsandbytes[test]", "build>=1.0.0,<2", - "ruff==0.9.6", + "ruff==0.11.2", "pre-commit>=3.5.0,<4", "wheel>=0.42,<1" ] @@ -66,7 +66,6 @@ test = [ "einops~=0.8.0", "lion-pytorch==0.2.3", "pytest~=8.3", - "scipy>=1.10.1,<2; python_version < '3.9'", "scipy>=1.11.4,<2; python_version >= '3.9'", "transformers>=4.30.1,<5" ] @@ -101,7 +100,7 @@ src = [ "tests", "benchmarking" ] -target-version = "py38" +target-version = "py39" line-length = 119 [tool.ruff.lint] @@ -124,6 +123,7 @@ ignore = [ "E731", # Do not use lambda "F841", # Local assigned but not used (TODO: enable, these are likely bugs) "RUF012", # Mutable class attribute annotations + "RUF034", # Useless if-else (TODO: enable) "ISC001", # single-line-implicit-string-concatenation incompatible with formatter ] diff --git a/tests/helpers.py b/tests/helpers.py index 84e2391f4..9e85eba93 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -1,7 +1,7 @@ from io import BytesIO from itertools import product import random -from typing import Any, List +from typing import Any import torch @@ -27,7 +27,7 @@ def torch_load_from_buffer(buffer): return obj -def get_test_dims(min: int, max: int, *, n: int) -> List[int]: +def get_test_dims(min: int, max: int, *, n: int) -> list[int]: return [test_dims_rng.randint(min, max) for _ in range(n)] diff --git a/tests/test_functional.py b/tests/test_functional.py index b4172dd35..37f26d727 100644 --- a/tests/test_functional.py +++ b/tests/test_functional.py @@ -674,12 +674,12 @@ def test_int8_double_quant(self, dim1, dim2): min_error = 1 / 500 if num_not_close_cols > (min_error * n): print( - f"Min error exceeded {num_not_close_cols} elements are different. Error: {num_not_close_cols/n:.4f}" + f"Min error exceeded {num_not_close_cols} elements are different. Error: {num_not_close_cols / n:.4f}" ) assert False if num_not_close_rows > (min_error * n): print( - f"Min error exceeded {num_not_close_rows} elements are different. Error: {num_not_close_rows/n:.4f}" + f"Min error exceeded {num_not_close_rows} elements are different. Error: {num_not_close_rows / n:.4f}" ) assert False