From 8fb3274d2a3ba829f68595efbe2eb4c60482498f Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Sun, 25 Jan 2026 21:02:06 +0100 Subject: [PATCH 1/3] - Add .config property to Array - Make .config attribute of AsyncArray public - Add .with_config method to Array and AsyncArray - Use .config attribute in docs --- docs/user-guide/performance.md | 9 +++- src/zarr/core/array.py | 78 +++++++++++++++++++++++++++++++--- src/zarr/core/array_spec.py | 6 +++ tests/test_api.py | 2 +- tests/test_array.py | 42 +++++++++++++++++- 5 files changed, 126 insertions(+), 11 deletions(-) diff --git a/docs/user-guide/performance.md b/docs/user-guide/performance.md index a2e986a1b8..0e0fa3cd55 100644 --- a/docs/user-guide/performance.md +++ b/docs/user-guide/performance.md @@ -125,7 +125,14 @@ This optimization prevents storing redundant objects and can speed up reads, but added computation during array writes, since the contents of each chunk must be compared to the fill value, and these advantages are contingent on the content of the array. If you know that your data will form chunks that are almost always non-empty, then there is no advantage to the optimization described above. -In this case, creating an array with `write_empty_chunks=True` (the default) will instruct Zarr to write every chunk without checking for emptiness. +In this case, creating an array with `write_empty_chunks=True` will instruct Zarr to write every chunk without checking for emptiness. + +The default value of `write_empty_chunks` is `False`: + +```python exec="true" session="performance" source="above" result="ansi" +arr = zarr.create_array(store={}, shape=(1,), dtype='uint8') +assert arr.config.write_empty_chunks == False +``` The following example illustrates the effect of the `write_empty_chunks` flag on the time required to write an array with different values.: diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 00536a1ec0..59d2c4c6fd 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -144,7 +144,7 @@ from zarr.codecs.sharding import ShardingCodecIndexLocation from zarr.core.dtype.wrapper import TBaseDType, TBaseScalar from zarr.storage import StoreLike - from zarr.types import AnyArray, AnyAsyncArray, AsyncArrayV2, AsyncArrayV3 + from zarr.types import AnyArray, AnyAsyncArray, ArrayV2, ArrayV3, AsyncArrayV2, AsyncArrayV3 # Array and AsyncArray are defined in the base ``zarr`` namespace @@ -300,14 +300,14 @@ class AsyncArray(Generic[T_ArrayMetadata]): The path to the Zarr store. codec_pipeline : CodecPipeline The codec pipeline used for encoding and decoding chunks. - _config : ArrayConfig + config : ArrayConfig The runtime configuration of the array. """ metadata: T_ArrayMetadata store_path: StorePath codec_pipeline: CodecPipeline = field(init=False) - _config: ArrayConfig + config: ArrayConfig @overload def __init__( @@ -336,7 +336,7 @@ def __init__( object.__setattr__(self, "metadata", metadata_parsed) object.__setattr__(self, "store_path", store_path) - object.__setattr__(self, "_config", config_parsed) + object.__setattr__(self, "config", config_parsed) object.__setattr__( self, "codec_pipeline", @@ -1165,7 +1165,7 @@ def order(self) -> MemoryOrder: if self.metadata.zarr_format == 2: return self.metadata.order else: - return self._config.order + return self.config.order @property def attrs(self) -> dict[str, JSON]: @@ -1298,6 +1298,35 @@ def _nshards(self) -> int: """ return product(self._shard_grid_shape) + @overload + def with_config(self: AsyncArrayV2, config: ArrayConfigLike) -> AsyncArrayV2: ... + + @overload + def with_config(self: AsyncArrayV3, config: ArrayConfigLike) -> AsyncArrayV3: ... + + def with_config(self, config: ArrayConfigLike) -> Self: + """ + Return a copy of this Array with a new runtime configuration. + + Parameters + ---------- + + config : ArrayConfigLike + The runtime config for the new Array. Any keys not specified will be inherited + from the current array's config. + + Returns + ------- + A new Array + """ + if isinstance(config, ArrayConfig): + new_config = config + else: + # Merge new config with existing config, so missing keys are inherited + # from the current array rather than from global defaults + new_config = ArrayConfig(**{**self.config.to_dict(), **config}) # type: ignore[arg-type] + return type(self)(metadata=self.metadata, store_path=self.store_path, config=new_config) + async def nchunks_initialized(self) -> int: """ Calculate the number of chunks that have been initialized in storage. @@ -1570,7 +1599,7 @@ async def _get_selection( ) if product(indexer.shape) > 0: # need to use the order from the metadata for v2 - _config = self._config + _config = self.config if self.metadata.zarr_format == 2: _config = replace(_config, order=self.order) @@ -1741,7 +1770,7 @@ async def _set_selection( value_buffer = prototype.nd_buffer.from_ndarray_like(value) # need to use the order from the metadata for v2 - _config = self._config + _config = self.config if self.metadata.zarr_format == 2: _config = replace(_config, order=self.metadata.order) @@ -2063,6 +2092,19 @@ def async_array(self) -> AsyncArray[T_ArrayMetadata]: """ return self._async_array + @property + def config(self) -> ArrayConfig: + """ + The runtime configuration for this array. This is a read-only property. To modify the + runtime configuration, use `Array.with_config` to create a new `Array` with the modified + configuration. + + Returns + ------- + An `ArrayConfig` object that defines the runtime configuration for the array. + """ + return self.async_array.config + @classmethod @deprecated("Use zarr.create_array instead.", category=ZarrDeprecationWarning) def create( @@ -2524,6 +2566,28 @@ def _nshards(self) -> int: """ return self.async_array._nshards + @overload + def with_config(self: ArrayV2, config: ArrayConfigLike) -> ArrayV2: ... + + @overload + def with_config(self: ArrayV3, config: ArrayConfigLike) -> ArrayV3: ... + + def with_config(self, config: ArrayConfigLike) -> Self: + """ + Return a copy of this Array with a new runtime configuration. + + Parameters + ---------- + + config : ArrayConfigLike + The runtime config for the new Array. + + Returns + ------- + A new Array + """ + return type(self)(self._async_array.with_config(config)) + @property def nbytes(self) -> int: """ diff --git a/src/zarr/core/array_spec.py b/src/zarr/core/array_spec.py index c4dedaefea..421dfbf145 100644 --- a/src/zarr/core/array_spec.py +++ b/src/zarr/core/array_spec.py @@ -69,6 +69,12 @@ def from_dict(cls, data: ArrayConfigParams) -> Self: kwargs_out[field_name] = data[field_name] return cls(**kwargs_out) + def to_dict(self) -> ArrayConfigParams: + """ + Serialize an instance of this class to a dict. + """ + return {"order": self.order, "write_empty_chunks": self.write_empty_chunks} + ArrayConfigLike = ArrayConfig | ArrayConfigParams diff --git a/tests/test_api.py b/tests/test_api.py index adea150ae1..82c75807df 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -232,7 +232,7 @@ def test_open_array_respects_write_empty_chunks_config(zarr_format: ZarrFormat) arr2 = zarr.open(store=store, path="test_array", config={"write_empty_chunks": True}) assert isinstance(arr2, zarr.Array) - assert arr2.async_array._config.write_empty_chunks is True + assert arr2.async_array.config.write_empty_chunks is True arr2[0:5] = np.zeros(5) assert arr2.nchunks_initialized == 1 diff --git a/tests/test_array.py b/tests/test_array.py index 67be294827..b7d7bc723d 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -44,6 +44,7 @@ default_filters_v2, default_serializer_v3, ) +from zarr.core.array_spec import ArrayConfig, ArrayConfigParams from zarr.core.buffer import NDArrayLike, NDArrayLikeOrScalar, default_buffer_prototype from zarr.core.chunk_grids import _auto_partition from zarr.core.chunk_key_encodings import ChunkKeyEncodingParams @@ -889,7 +890,7 @@ def test_write_empty_chunks_behavior( config={"write_empty_chunks": write_empty_chunks}, ) - assert arr.async_array._config.write_empty_chunks == write_empty_chunks + assert arr.async_array.config.write_empty_chunks == write_empty_chunks # initialize the store with some non-fill value chunks arr[:] = fill_value + 1 @@ -1562,7 +1563,7 @@ async def test_write_empty_chunks_config(write_empty_chunks: bool, store: Store) """ with zarr.config.set({"array.write_empty_chunks": write_empty_chunks}): arr = await create_array(store, shape=(2, 2), dtype="i4") - assert arr._config.write_empty_chunks == write_empty_chunks + assert arr.config.write_empty_chunks == write_empty_chunks @staticmethod @pytest.mark.parametrize("path", [None, "", "/", "/foo", "foo", "foo/bar"]) @@ -2194,3 +2195,40 @@ def test_create_array_with_data_num_gets( # one get for the metadata and one per shard. # Note: we don't actually need one get per shard, but this is the current behavior assert store.counter["get"] == 1 + num_shards + + +@pytest.mark.parametrize("config", [{}, {"write_empty_chunks": True}, {"order": "C"}]) +def test_with_config(config: ArrayConfigParams) -> None: + """ + Test that `AsyncArray.with_config` and `Array.with_config` create a copy of the source + array with a new runtime configuration. + """ + # the config we start with + source_config: ArrayConfigParams = {"write_empty_chunks": False, "order": "F"} + source_array = zarr.create_array({}, shape=(1,), dtype="uint8", config=source_config) + + new_async_array_config_dict = source_array._async_array.with_config(config).config.to_dict() + new_array_config_dict = source_array.with_config(config).config.to_dict() + + for key in source_config: + if key in config: + assert new_async_array_config_dict[key] == config[key] # type: ignore[literal-required] + assert new_array_config_dict[key] == config[key] # type: ignore[literal-required] + else: + assert new_async_array_config_dict[key] == source_config[key] # type: ignore[literal-required] + assert new_array_config_dict[key] == source_config[key] # type: ignore[literal-required] + + +def test_with_config_polymorphism() -> None: + """ + Test that `AsyncArray.with_config` and `Array.with_config` accept dicts and full array config + objects. + """ + source_config: ArrayConfig = ArrayConfig.from_dict({"write_empty_chunks": False, "order": "F"}) + source_config_dict = source_config.to_dict() + + arr = zarr.create_array({}, shape=(1,), dtype="uint8") + arr_source_config = arr.with_config(source_config) + arr_source_config_dict = arr.with_config(source_config_dict) + + assert arr_source_config.config == arr_source_config_dict.config From 3f8f1f67c532112fb1dae247645b440c33e96e92 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Sun, 25 Jan 2026 21:10:53 +0100 Subject: [PATCH 2/3] bring back _config as an alias for config; update docstring --- src/zarr/core/array.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 59d2c4c6fd..a3a2aff250 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -1012,6 +1012,11 @@ async def example(): def store(self) -> Store: return self.store_path.store + @property + @deprecated("Use AsyncArray.config instead.", category=ZarrDeprecationWarning) + def _config(self) -> ArrayConfig: + return self.config + @property def ndim(self) -> int: """Returns the number of dimensions in the Array. @@ -2580,7 +2585,8 @@ def with_config(self, config: ArrayConfigLike) -> Self: ---------- config : ArrayConfigLike - The runtime config for the new Array. + The runtime config for the new Array. Any keys not specified will be inherited + from the current array's config. Returns ------- From 4a425e85c969fb52feaca674d59d141448455ff3 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Mon, 26 Jan 2026 13:48:03 +0100 Subject: [PATCH 3/3] release note --- changes/3668.feature.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 changes/3668.feature.md diff --git a/changes/3668.feature.md b/changes/3668.feature.md new file mode 100644 index 0000000000..def196ec8a --- /dev/null +++ b/changes/3668.feature.md @@ -0,0 +1,4 @@ +Exposes the array runtime configuration as an attribute called `config` on the `Array` and +`AsyncArray` classes. The previous `AsyncArray._config` attribute is now a deprecated alias for `AsyncArray.config`. + +Adds a method for creating a new `Array` / `AsyncArray` instance with a new runtime configuration, and fixes inaccurate documentation about the `write_empty_chunks` configuration parameter. \ No newline at end of file