diff --git a/src/diffusers/quantizers/gguf/utils.py b/src/diffusers/quantizers/gguf/utils.py index adb429688723..e0ad0e1cce42 100644 --- a/src/diffusers/quantizers/gguf/utils.py +++ b/src/diffusers/quantizers/gguf/utils.py @@ -516,6 +516,9 @@ def dequantize_gguf_tensor(tensor): block_size, type_size = GGML_QUANT_SIZES[quant_type] + # Conver to plain tensor to avoid unnecessary __torch_function__ overhead. + tensor = tensor.as_tensor() + tensor = tensor.view(torch.uint8) shape = _quant_shape_from_byte_shape(tensor.shape, type_size, block_size) @@ -525,7 +528,7 @@ def dequantize_gguf_tensor(tensor): dequant = dequant_fn(blocks, block_size, type_size) dequant = dequant.reshape(shape) - return dequant.as_tensor() + return dequant class GGUFParameter(torch.nn.Parameter):