diff --git a/src/zarr/_compat.py b/src/zarr/_compat.py index 87427b486e..ae973d6292 100644 --- a/src/zarr/_compat.py +++ b/src/zarr/_compat.py @@ -2,10 +2,16 @@ from collections.abc import Callable from functools import wraps from inspect import Parameter, signature -from typing import Any, TypeVar +from typing import TYPE_CHECKING, Any, TypeVar + +import numpy as np +from packaging.version import Version from zarr.errors import ZarrFutureWarning +if TYPE_CHECKING: + from numpy.typing import NDArray + T = TypeVar("T") # Based off https://github.com/scikit-learn/scikit-learn/blob/e87b32a81c70abed8f2e97483758eb64df8255e9/sklearn/utils/validation.py#L63 @@ -68,3 +74,37 @@ def inner_f(*args: Any, **kwargs: Any) -> T: return _inner_deprecate_positional_args(func) return _inner_deprecate_positional_args # type: ignore[return-value] + + +def _reshape_view(arr: "NDArray[Any]", shape: tuple[int, ...]) -> "NDArray[Any]": + """Reshape an array without copying data. + + This function provides compatibility across NumPy versions for reshaping arrays + as views. On NumPy >= 2.1, it uses ``reshape(copy=False)`` which explicitly + fails if a view cannot be created. On older versions, it uses direct shape + assignment which has the same behavior but is deprecated in 2.5+. + + Parameters + ---------- + arr : NDArray + The array to reshape. + shape : tuple of int + The new shape. + + Returns + ------- + NDArray + A reshaped view of the array. + + Raises + ------ + AttributeError + If a view cannot be created (the array is not contiguous) on NumPy < 2.1. + ValueError + If a view cannot be created (the array is not contiguous) on NumPy >= 2.1. + """ + if Version(np.__version__) >= Version("2.1"): + return arr.reshape(shape, copy=False) # type: ignore[call-overload, no-any-return] + else: + arr.shape = shape + return arr diff --git a/src/zarr/codecs/vlen_utf8.py b/src/zarr/codecs/vlen_utf8.py index d8e6072333..fb1fb76126 100644 --- a/src/zarr/codecs/vlen_utf8.py +++ b/src/zarr/codecs/vlen_utf8.py @@ -6,6 +6,7 @@ import numpy as np from numcodecs.vlen import VLenBytes, VLenUTF8 +from zarr._compat import _reshape_view from zarr.abc.codec import ArrayBytesCodec from zarr.core.buffer import Buffer, NDBuffer from zarr.core.common import JSON, parse_named_configuration @@ -50,7 +51,7 @@ async def _decode_single( raw_bytes = chunk_bytes.as_array_like() decoded = _vlen_utf8_codec.decode(raw_bytes) assert decoded.dtype == np.object_ - decoded = decoded.reshape(chunk_spec.shape) + decoded = _reshape_view(decoded, chunk_spec.shape) as_string_dtype = decoded.astype(chunk_spec.dtype.to_native_dtype(), copy=False) return chunk_spec.prototype.nd_buffer.from_numpy_array(as_string_dtype) @@ -95,7 +96,7 @@ async def _decode_single( raw_bytes = chunk_bytes.as_array_like() decoded = _vlen_bytes_codec.decode(raw_bytes) assert decoded.dtype == np.object_ - decoded = decoded.reshape(chunk_spec.shape) + decoded = _reshape_view(decoded, chunk_spec.shape) return chunk_spec.prototype.nd_buffer.from_numpy_array(decoded) async def _encode_single(