From ec500839349c1adbf17167ccda737c8e4a1115e6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 26 Jan 2026 12:12:13 +0000 Subject: [PATCH 1/6] Initial plan From f379ea170e0a7c1c466a0e735f3150bb5d549c3a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 26 Jan 2026 12:17:50 +0000 Subject: [PATCH 2/6] Implement default chunk shape and dimension separator auto-detection Co-authored-by: brokkoli71 <44113112+brokkoli71@users.noreply.github.com> --- src/main/java/dev/zarr/zarrjava/v2/Array.java | 88 ++++++++++++++ .../zarrjava/v2/ArrayMetadataBuilder.java | 33 +++++- .../java/dev/zarr/zarrjava/ZarrV2Test.java | 112 +++++++++++++++++- 3 files changed, 229 insertions(+), 4 deletions(-) diff --git a/src/main/java/dev/zarr/zarrjava/v2/Array.java b/src/main/java/dev/zarr/zarrjava/v2/Array.java index 87d767a..dca0549 100644 --- a/src/main/java/dev/zarr/zarrjava/v2/Array.java +++ b/src/main/java/dev/zarr/zarrjava/v2/Array.java @@ -4,15 +4,18 @@ import com.fasterxml.jackson.databind.ObjectWriter; import dev.zarr.zarrjava.ZarrException; import dev.zarr.zarrjava.core.Attributes; +import dev.zarr.zarrjava.core.chunkkeyencoding.Separator; import dev.zarr.zarrjava.core.codec.CodecPipeline; import dev.zarr.zarrjava.store.FilesystemStore; import dev.zarr.zarrjava.store.MemoryStore; +import dev.zarr.zarrjava.store.Store; import dev.zarr.zarrjava.store.StoreHandle; import dev.zarr.zarrjava.utils.Utils; import dev.zarr.zarrjava.v2.codec.Codec; import dev.zarr.zarrjava.v2.codec.core.BytesCodec; import javax.annotation.Nonnull; +import javax.annotation.Nullable; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.file.Path; @@ -57,6 +60,27 @@ public static Array open(StoreHandle storeHandle) throws IOException, ZarrExcept Utils.toArray(storeHandle.resolve(ZATTRS).readNonNull()), Attributes.class ); + + // Auto-detect dimension separator if not specified and array has multiple dimensions + if (metadata.dimensionSeparator == null && metadata.shape.length > 1) { + Separator detectedSeparator = detectDimensionSeparator(storeHandle, metadata.chunks); + if (detectedSeparator != null) { + // Create a new metadata instance with the detected separator + metadata = new ArrayMetadata( + metadata.zarrFormat, + metadata.shape, + metadata.chunks, + metadata.dataType, + metadata.parsedFillValue, + metadata.order, + metadata.filters, + metadata.compressor, + detectedSeparator, + metadata.attributes + ); + } + } + return new Array( storeHandle, metadata @@ -248,6 +272,70 @@ public Array updateAttributes(Function attributeMapper) return setAttributes(attributeMapper.apply(metadata.attributes)); } + /** + * Detects dimension separator from existing chunk files in the store. + * Similar to JZarr's ZarrArray::findNestedChunks method. + * + * @param storeHandle the store containing the array + * @param chunks the chunk shape + * @return the detected separator (SLASH or DOT), or null if detection fails + */ + @Nullable + private static Separator detectDimensionSeparator(StoreHandle storeHandle, int[] chunks) { + // Only attempt detection if store supports listing + if (!(storeHandle.store instanceof Store.ListableStore)) { + return null; + } + + try { + // List all files in the array directory + String[] chunkFiles = storeHandle.list() + .filter(key -> !key.startsWith(".z")) // Exclude metadata files + .toArray(String[]::new); + + if (chunkFiles.length == 0) { + // No chunks exist yet, cannot detect + return null; + } + + // Build regex pattern to match nested chunks (SLASH separator) + // Pattern: \d+/\d+/... for multi-dimensional arrays + StringBuilder nestedPattern = new StringBuilder("\\d+"); + for (int i = 1; i < chunks.length; i++) { + nestedPattern.append("/\\d+"); + } + String nestedRegex = nestedPattern.toString(); + + // Check if any chunk file matches the nested pattern + for (String chunkFile : chunkFiles) { + if (chunkFile.matches(nestedRegex)) { + return Separator.SLASH; + } + } + + // Build regex pattern to match flat chunks (DOT separator) + // Pattern: \d+.\d+.... for multi-dimensional arrays + StringBuilder flatPattern = new StringBuilder("\\d+"); + for (int i = 1; i < chunks.length; i++) { + flatPattern.append("\\.\\d+"); + } + String flatRegex = flatPattern.toString(); + + // Check if any chunk file matches the flat pattern + for (String chunkFile : chunkFiles) { + if (chunkFile.matches(flatRegex)) { + return Separator.DOT; + } + } + + // Could not detect separator from existing chunks + return null; + } catch (Exception e) { + // If listing fails, return null + return null; + } + } + @Override public String toString() { return String.format("", storeHandle, diff --git a/src/main/java/dev/zarr/zarrjava/v2/ArrayMetadataBuilder.java b/src/main/java/dev/zarr/zarrjava/v2/ArrayMetadataBuilder.java index 8ec6707..ab0e350 100644 --- a/src/main/java/dev/zarr/zarrjava/v2/ArrayMetadataBuilder.java +++ b/src/main/java/dev/zarr/zarrjava/v2/ArrayMetadataBuilder.java @@ -146,12 +146,15 @@ public ArrayMetadata build() throws ZarrException { if (shape == null) { throw new IllegalStateException("Please call `withShape` first."); } - if (chunks == null) { - throw new IllegalStateException("Please call `withChunks` first."); - } if (dataType == null) { throw new IllegalStateException("Please call `withDataType` first."); } + + // If chunks are not specified, calculate default chunks + if (chunks == null) { + chunks = calculateDefaultChunks(shape); + } + return new ArrayMetadata( 2, shape, @@ -165,4 +168,28 @@ public ArrayMetadata build() throws ZarrException { attributes ); } + + /** + * Calculate default chunk shape when not specified. + * Similar to JZarr's ArrayParams.build() logic, targeting chunks of about size 512. + */ + private int[] calculateDefaultChunks(long[] shape) { + int[] chunks = new int[shape.length]; + for (int i = 0; i < shape.length; i++) { + long shapeDim = shape[i]; + int numChunks = (int) (shapeDim / 512); + if (numChunks > 0) { + int chunkDim = (int) (shapeDim / (numChunks + 1)); + if (shapeDim % chunkDim == 0) { + chunks[i] = chunkDim; + } else { + chunks[i] = chunkDim + 1; + } + } else { + // If dimension is smaller than 512, use the full dimension + chunks[i] = (int) shapeDim; + } + } + return chunks; + } } \ No newline at end of file diff --git a/src/test/java/dev/zarr/zarrjava/ZarrV2Test.java b/src/test/java/dev/zarr/zarrjava/ZarrV2Test.java index 084d831..ea25630 100644 --- a/src/test/java/dev/zarr/zarrjava/ZarrV2Test.java +++ b/src/test/java/dev/zarr/zarrjava/ZarrV2Test.java @@ -407,4 +407,114 @@ public void testMemoryStore() throws ZarrException, IOException { System.out.println(s); } -} \ No newline at end of file + @Test + public void testDefaultChunkShape() throws IOException, ZarrException { + // Test with a small array (< 512 elements per dimension) + Array smallArray = Array.create( + new FilesystemStore(TESTOUTPUT).resolve("v2_default_chunks_small"), + Array.metadataBuilder() + .withShape(100, 50) + .withDataType(DataType.UINT8) + .build() + ); + Assertions.assertEquals(2, smallArray.metadata().chunks.length); + // Both dimensions < 512, so chunks should equal shape + Assertions.assertEquals(100, smallArray.metadata().chunks[0]); + Assertions.assertEquals(50, smallArray.metadata().chunks[1]); + + // Test with a larger array (> 512 elements per dimension) + Array largeArray = Array.create( + new FilesystemStore(TESTOUTPUT).resolve("v2_default_chunks_large"), + Array.metadataBuilder() + .withShape(2000, 1500) + .withDataType(DataType.UINT8) + .build() + ); + Assertions.assertEquals(2, largeArray.metadata().chunks.length); + // Chunks should be calculated based on division by 512 + Assertions.assertTrue(largeArray.metadata().chunks[0] > 0); + Assertions.assertTrue(largeArray.metadata().chunks[0] < 2000); + Assertions.assertTrue(largeArray.metadata().chunks[1] > 0); + Assertions.assertTrue(largeArray.metadata().chunks[1] < 1500); + + // Test with mixed dimensions + Array mixedArray = Array.create( + new FilesystemStore(TESTOUTPUT).resolve("v2_default_chunks_mixed"), + Array.metadataBuilder() + .withShape(1024, 100, 2048) + .withDataType(DataType.UINT8) + .build() + ); + Assertions.assertEquals(3, mixedArray.metadata().chunks.length); + // Verify chunks are reasonable + Assertions.assertTrue(mixedArray.metadata().chunks[0] > 0); + Assertions.assertTrue(mixedArray.metadata().chunks[0] <= 1024); + Assertions.assertEquals(100, mixedArray.metadata().chunks[1]); // < 512, should equal shape + Assertions.assertTrue(mixedArray.metadata().chunks[2] > 0); + Assertions.assertTrue(mixedArray.metadata().chunks[2] <= 2048); + } + + @Test + public void testDimensionSeparatorAutoDetection() throws IOException, ZarrException { + // Test with SLASH separator + StoreHandle slashStoreHandle = new FilesystemStore(TESTOUTPUT).resolve("v2_separator_detection_slash"); + Array slashArray = Array.create( + slashStoreHandle, + Array.metadataBuilder() + .withShape(10, 10) + .withDataType(DataType.UINT8) + .withChunks(5, 5) + .withDimensionSeparator(dev.zarr.zarrjava.core.chunkkeyencoding.Separator.SLASH) + .build() + ); + + // Write some data to create chunk files + slashArray.write(new long[]{0, 0}, ucar.ma2.Array.factory(ucar.ma2.DataType.UBYTE, new int[]{5, 5})); + + // Now open without specifying separator - it should auto-detect SLASH + Array reopenedSlashArray = Array.open(slashStoreHandle); + Assertions.assertEquals(dev.zarr.zarrjava.core.chunkkeyencoding.Separator.SLASH, + reopenedSlashArray.metadata().dimensionSeparator); + + // Test with DOT separator + StoreHandle dotStoreHandle = new FilesystemStore(TESTOUTPUT).resolve("v2_separator_detection_dot"); + Array dotArray = Array.create( + dotStoreHandle, + Array.metadataBuilder() + .withShape(10, 10) + .withDataType(DataType.UINT8) + .withChunks(5, 5) + .withDimensionSeparator(dev.zarr.zarrjava.core.chunkkeyencoding.Separator.DOT) + .build() + ); + + // Write some data to create chunk files + dotArray.write(new long[]{0, 0}, ucar.ma2.Array.factory(ucar.ma2.DataType.UBYTE, new int[]{5, 5})); + + // Now open without specifying separator - it should auto-detect DOT + Array reopenedDotArray = Array.open(dotStoreHandle); + Assertions.assertEquals(dev.zarr.zarrjava.core.chunkkeyencoding.Separator.DOT, + reopenedDotArray.metadata().dimensionSeparator); + } + + @Test + public void testDimensionSeparatorDefaultFallback() throws IOException, ZarrException { + // Test that when no chunks exist, the separator defaults to DOT (as per ArrayMetadata.chunkKeyEncoding()) + StoreHandle emptyStoreHandle = new FilesystemStore(TESTOUTPUT).resolve("v2_separator_detection_empty"); + Array emptyArray = Array.create( + emptyStoreHandle, + Array.metadataBuilder() + .withShape(10, 10) + .withDataType(DataType.UINT8) + .withChunks(5, 5) + .build() + ); + + // Open without writing any chunks + Array reopenedEmptyArray = Array.open(emptyStoreHandle); + // Should use default DOT separator from ArrayMetadata.chunkKeyEncoding() + // When dimensionSeparator is null, chunkKeyEncoding() defaults to DOT + Assertions.assertEquals(dev.zarr.zarrjava.core.chunkkeyencoding.Separator.DOT, + reopenedEmptyArray.metadata().chunkKeyEncoding().separator); + } +} From d584375ea580eed33ddc8360912741751bce495d Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 26 Jan 2026 12:21:56 +0000 Subject: [PATCH 3/6] Address code review feedback and add detailed documentation Co-authored-by: brokkoli71 <44113112+brokkoli71@users.noreply.github.com> --- src/main/java/dev/zarr/zarrjava/v2/Array.java | 4 ++-- .../java/dev/zarr/zarrjava/v2/ArrayMetadataBuilder.java | 9 ++++++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/main/java/dev/zarr/zarrjava/v2/Array.java b/src/main/java/dev/zarr/zarrjava/v2/Array.java index dca0549..9233cd8 100644 --- a/src/main/java/dev/zarr/zarrjava/v2/Array.java +++ b/src/main/java/dev/zarr/zarrjava/v2/Array.java @@ -288,9 +288,9 @@ private static Separator detectDimensionSeparator(StoreHandle storeHandle, int[] } try { - // List all files in the array directory + // List all files in the array directory, excluding metadata files (.zarray, .zattrs, .zgroup) String[] chunkFiles = storeHandle.list() - .filter(key -> !key.startsWith(".z")) // Exclude metadata files + .filter(key -> !key.startsWith(".z")) .toArray(String[]::new); if (chunkFiles.length == 0) { diff --git a/src/main/java/dev/zarr/zarrjava/v2/ArrayMetadataBuilder.java b/src/main/java/dev/zarr/zarrjava/v2/ArrayMetadataBuilder.java index ab0e350..c2c805c 100644 --- a/src/main/java/dev/zarr/zarrjava/v2/ArrayMetadataBuilder.java +++ b/src/main/java/dev/zarr/zarrjava/v2/ArrayMetadataBuilder.java @@ -171,7 +171,14 @@ public ArrayMetadata build() throws ZarrException { /** * Calculate default chunk shape when not specified. - * Similar to JZarr's ArrayParams.build() logic, targeting chunks of about size 512. + * This implements JZarr's ArrayParams.build() logic, targeting chunks of approximately 512 elements. + * + * The algorithm divides each dimension by 512 to determine the number of ~512-sized chunks, + * then calculates chunk sizes that will cover the dimension. Note that the total coverage + * may slightly exceed the dimension size (e.g., for shape=1024, chunks=342 results in + * 3 chunks covering 1026 elements). This is intentional and matches JZarr behavior - + * Zarr handles out-of-bounds gracefully, and the goal is approximate chunk sizes rather + * than perfect tiling. */ private int[] calculateDefaultChunks(long[] shape) { int[] chunks = new int[shape.length]; From 61659ff1cf53044ed381d0bc90ef6945bc45c4ae Mon Sep 17 00:00:00 2001 From: brokkoli71 Date: Mon, 26 Jan 2026 14:53:03 +0100 Subject: [PATCH 4/6] remove detectDimensionSeparator --- src/main/java/dev/zarr/zarrjava/v2/Array.java | 85 ------------------- .../java/dev/zarr/zarrjava/ZarrV2Test.java | 64 -------------- 2 files changed, 149 deletions(-) diff --git a/src/main/java/dev/zarr/zarrjava/v2/Array.java b/src/main/java/dev/zarr/zarrjava/v2/Array.java index 9233cd8..a34c85a 100644 --- a/src/main/java/dev/zarr/zarrjava/v2/Array.java +++ b/src/main/java/dev/zarr/zarrjava/v2/Array.java @@ -60,27 +60,6 @@ public static Array open(StoreHandle storeHandle) throws IOException, ZarrExcept Utils.toArray(storeHandle.resolve(ZATTRS).readNonNull()), Attributes.class ); - - // Auto-detect dimension separator if not specified and array has multiple dimensions - if (metadata.dimensionSeparator == null && metadata.shape.length > 1) { - Separator detectedSeparator = detectDimensionSeparator(storeHandle, metadata.chunks); - if (detectedSeparator != null) { - // Create a new metadata instance with the detected separator - metadata = new ArrayMetadata( - metadata.zarrFormat, - metadata.shape, - metadata.chunks, - metadata.dataType, - metadata.parsedFillValue, - metadata.order, - metadata.filters, - metadata.compressor, - detectedSeparator, - metadata.attributes - ); - } - } - return new Array( storeHandle, metadata @@ -272,70 +251,6 @@ public Array updateAttributes(Function attributeMapper) return setAttributes(attributeMapper.apply(metadata.attributes)); } - /** - * Detects dimension separator from existing chunk files in the store. - * Similar to JZarr's ZarrArray::findNestedChunks method. - * - * @param storeHandle the store containing the array - * @param chunks the chunk shape - * @return the detected separator (SLASH or DOT), or null if detection fails - */ - @Nullable - private static Separator detectDimensionSeparator(StoreHandle storeHandle, int[] chunks) { - // Only attempt detection if store supports listing - if (!(storeHandle.store instanceof Store.ListableStore)) { - return null; - } - - try { - // List all files in the array directory, excluding metadata files (.zarray, .zattrs, .zgroup) - String[] chunkFiles = storeHandle.list() - .filter(key -> !key.startsWith(".z")) - .toArray(String[]::new); - - if (chunkFiles.length == 0) { - // No chunks exist yet, cannot detect - return null; - } - - // Build regex pattern to match nested chunks (SLASH separator) - // Pattern: \d+/\d+/... for multi-dimensional arrays - StringBuilder nestedPattern = new StringBuilder("\\d+"); - for (int i = 1; i < chunks.length; i++) { - nestedPattern.append("/\\d+"); - } - String nestedRegex = nestedPattern.toString(); - - // Check if any chunk file matches the nested pattern - for (String chunkFile : chunkFiles) { - if (chunkFile.matches(nestedRegex)) { - return Separator.SLASH; - } - } - - // Build regex pattern to match flat chunks (DOT separator) - // Pattern: \d+.\d+.... for multi-dimensional arrays - StringBuilder flatPattern = new StringBuilder("\\d+"); - for (int i = 1; i < chunks.length; i++) { - flatPattern.append("\\.\\d+"); - } - String flatRegex = flatPattern.toString(); - - // Check if any chunk file matches the flat pattern - for (String chunkFile : chunkFiles) { - if (chunkFile.matches(flatRegex)) { - return Separator.DOT; - } - } - - // Could not detect separator from existing chunks - return null; - } catch (Exception e) { - // If listing fails, return null - return null; - } - } - @Override public String toString() { return String.format("", storeHandle, diff --git a/src/test/java/dev/zarr/zarrjava/ZarrV2Test.java b/src/test/java/dev/zarr/zarrjava/ZarrV2Test.java index ea25630..6bbb27e 100644 --- a/src/test/java/dev/zarr/zarrjava/ZarrV2Test.java +++ b/src/test/java/dev/zarr/zarrjava/ZarrV2Test.java @@ -453,68 +453,4 @@ public void testDefaultChunkShape() throws IOException, ZarrException { Assertions.assertTrue(mixedArray.metadata().chunks[2] > 0); Assertions.assertTrue(mixedArray.metadata().chunks[2] <= 2048); } - - @Test - public void testDimensionSeparatorAutoDetection() throws IOException, ZarrException { - // Test with SLASH separator - StoreHandle slashStoreHandle = new FilesystemStore(TESTOUTPUT).resolve("v2_separator_detection_slash"); - Array slashArray = Array.create( - slashStoreHandle, - Array.metadataBuilder() - .withShape(10, 10) - .withDataType(DataType.UINT8) - .withChunks(5, 5) - .withDimensionSeparator(dev.zarr.zarrjava.core.chunkkeyencoding.Separator.SLASH) - .build() - ); - - // Write some data to create chunk files - slashArray.write(new long[]{0, 0}, ucar.ma2.Array.factory(ucar.ma2.DataType.UBYTE, new int[]{5, 5})); - - // Now open without specifying separator - it should auto-detect SLASH - Array reopenedSlashArray = Array.open(slashStoreHandle); - Assertions.assertEquals(dev.zarr.zarrjava.core.chunkkeyencoding.Separator.SLASH, - reopenedSlashArray.metadata().dimensionSeparator); - - // Test with DOT separator - StoreHandle dotStoreHandle = new FilesystemStore(TESTOUTPUT).resolve("v2_separator_detection_dot"); - Array dotArray = Array.create( - dotStoreHandle, - Array.metadataBuilder() - .withShape(10, 10) - .withDataType(DataType.UINT8) - .withChunks(5, 5) - .withDimensionSeparator(dev.zarr.zarrjava.core.chunkkeyencoding.Separator.DOT) - .build() - ); - - // Write some data to create chunk files - dotArray.write(new long[]{0, 0}, ucar.ma2.Array.factory(ucar.ma2.DataType.UBYTE, new int[]{5, 5})); - - // Now open without specifying separator - it should auto-detect DOT - Array reopenedDotArray = Array.open(dotStoreHandle); - Assertions.assertEquals(dev.zarr.zarrjava.core.chunkkeyencoding.Separator.DOT, - reopenedDotArray.metadata().dimensionSeparator); - } - - @Test - public void testDimensionSeparatorDefaultFallback() throws IOException, ZarrException { - // Test that when no chunks exist, the separator defaults to DOT (as per ArrayMetadata.chunkKeyEncoding()) - StoreHandle emptyStoreHandle = new FilesystemStore(TESTOUTPUT).resolve("v2_separator_detection_empty"); - Array emptyArray = Array.create( - emptyStoreHandle, - Array.metadataBuilder() - .withShape(10, 10) - .withDataType(DataType.UINT8) - .withChunks(5, 5) - .build() - ); - - // Open without writing any chunks - Array reopenedEmptyArray = Array.open(emptyStoreHandle); - // Should use default DOT separator from ArrayMetadata.chunkKeyEncoding() - // When dimensionSeparator is null, chunkKeyEncoding() defaults to DOT - Assertions.assertEquals(dev.zarr.zarrjava.core.chunkkeyencoding.Separator.DOT, - reopenedEmptyArray.metadata().chunkKeyEncoding().separator); - } } From a4351171e393c4028f7a1e65fdd67d776c3d9f84 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 26 Jan 2026 14:26:01 +0000 Subject: [PATCH 5/6] Add default chunk shape calculation for v3 arrays Co-authored-by: brokkoli71 <44113112+brokkoli71@users.noreply.github.com> --- .../zarrjava/v3/ArrayMetadataBuilder.java | 37 +++++++++++++- .../java/dev/zarr/zarrjava/ZarrV3Test.java | 49 ++++++++++++++++++- 2 files changed, 84 insertions(+), 2 deletions(-) diff --git a/src/main/java/dev/zarr/zarrjava/v3/ArrayMetadataBuilder.java b/src/main/java/dev/zarr/zarrjava/v3/ArrayMetadataBuilder.java index 83a6e1a..ee976b8 100644 --- a/src/main/java/dev/zarr/zarrjava/v3/ArrayMetadataBuilder.java +++ b/src/main/java/dev/zarr/zarrjava/v3/ArrayMetadataBuilder.java @@ -161,13 +161,48 @@ public ArrayMetadata build() throws ZarrException { if (dataType == null) { throw new ZarrException("Data type needs to be provided. Please call `.withDataType`."); } + + // If chunk grid is not specified, calculate default chunks if (chunkGrid == null) { - throw new ZarrException("Chunk grid needs to be provided. Please call `.withChunkShape`."); + int[] defaultChunks = calculateDefaultChunks(shape); + chunkGrid = new RegularChunkGrid(new RegularChunkGrid.Configuration(defaultChunks)); } + return new ArrayMetadata(shape, dataType, chunkGrid, chunkKeyEncoding, fillValue, codecs, dimensionNames, attributes, storageTransformers ); } + + /** + * Calculate default chunk shape when not specified. + * This implements JZarr's ArrayParams.build() logic, targeting chunks of approximately 512 elements. + * + * The algorithm divides each dimension by 512 to determine the number of ~512-sized chunks, + * then calculates chunk sizes that will cover the dimension. Note that the total coverage + * may slightly exceed the dimension size (e.g., for shape=1024, chunks=342 results in + * 3 chunks covering 1026 elements). This is intentional and matches JZarr behavior - + * Zarr handles out-of-bounds gracefully, and the goal is approximate chunk sizes rather + * than perfect tiling. + */ + private int[] calculateDefaultChunks(long[] shape) { + int[] chunks = new int[shape.length]; + for (int i = 0; i < shape.length; i++) { + long shapeDim = shape[i]; + int numChunks = (int) (shapeDim / 512); + if (numChunks > 0) { + int chunkDim = (int) (shapeDim / (numChunks + 1)); + if (shapeDim % chunkDim == 0) { + chunks[i] = chunkDim; + } else { + chunks[i] = chunkDim + 1; + } + } else { + // If dimension is smaller than 512, use the full dimension + chunks[i] = (int) shapeDim; + } + } + return chunks; + } } diff --git a/src/test/java/dev/zarr/zarrjava/ZarrV3Test.java b/src/test/java/dev/zarr/zarrjava/ZarrV3Test.java index 8390c3c..56295d3 100644 --- a/src/test/java/dev/zarr/zarrjava/ZarrV3Test.java +++ b/src/test/java/dev/zarr/zarrjava/ZarrV3Test.java @@ -773,4 +773,51 @@ public void testUnalignedArrayAccess(int arrayShape, int chunkShape, int accessS Assertions.assertArrayEquals(expectedData, (int[]) result.get1DJavaArray(ucar.ma2.DataType.UINT)); } } -} \ No newline at end of file + + @Test + public void testDefaultChunkShape() throws IOException, ZarrException { + // Test with a small array (< 512 elements per dimension) + Array smallArray = Array.create( + new FilesystemStore(TESTOUTPUT).resolve("v3_default_chunks_small"), + Array.metadataBuilder() + .withShape(100, 50) + .withDataType(DataType.UINT8) + .build() + ); + Assertions.assertEquals(2, smallArray.metadata().chunkShape().length); + // Both dimensions < 512, so chunks should equal shape + Assertions.assertEquals(100, smallArray.metadata().chunkShape()[0]); + Assertions.assertEquals(50, smallArray.metadata().chunkShape()[1]); + + // Test with a larger array (> 512 elements per dimension) + Array largeArray = Array.create( + new FilesystemStore(TESTOUTPUT).resolve("v3_default_chunks_large"), + Array.metadataBuilder() + .withShape(2000, 1500) + .withDataType(DataType.UINT8) + .build() + ); + Assertions.assertEquals(2, largeArray.metadata().chunkShape().length); + // Chunks should be calculated based on division by 512 + Assertions.assertTrue(largeArray.metadata().chunkShape()[0] > 0); + Assertions.assertTrue(largeArray.metadata().chunkShape()[0] < 2000); + Assertions.assertTrue(largeArray.metadata().chunkShape()[1] > 0); + Assertions.assertTrue(largeArray.metadata().chunkShape()[1] < 1500); + + // Test with mixed dimensions + Array mixedArray = Array.create( + new FilesystemStore(TESTOUTPUT).resolve("v3_default_chunks_mixed"), + Array.metadataBuilder() + .withShape(1024, 100, 2048) + .withDataType(DataType.UINT8) + .build() + ); + Assertions.assertEquals(3, mixedArray.metadata().chunkShape().length); + // Verify chunks are reasonable + Assertions.assertTrue(mixedArray.metadata().chunkShape()[0] > 0); + Assertions.assertTrue(mixedArray.metadata().chunkShape()[0] <= 1024); + Assertions.assertEquals(100, mixedArray.metadata().chunkShape()[1]); // < 512, should equal shape + Assertions.assertTrue(mixedArray.metadata().chunkShape()[2] > 0); + Assertions.assertTrue(mixedArray.metadata().chunkShape()[2] <= 2048); + } +} From f76f7d600381d4411fe2ea0d5324e5cbaffc67b6 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 26 Jan 2026 14:28:25 +0000 Subject: [PATCH 6/6] Refactor: Extract calculateDefaultChunks to shared Utils class Co-authored-by: brokkoli71 <44113112+brokkoli71@users.noreply.github.com> --- .../java/dev/zarr/zarrjava/utils/Utils.java | 34 +++++++++++++++++++ .../zarrjava/v2/ArrayMetadataBuilder.java | 34 ++----------------- .../zarrjava/v3/ArrayMetadataBuilder.java | 34 ++----------------- 3 files changed, 38 insertions(+), 64 deletions(-) diff --git a/src/main/java/dev/zarr/zarrjava/utils/Utils.java b/src/main/java/dev/zarr/zarrjava/utils/Utils.java index 4a95d1e..d7c54df 100644 --- a/src/main/java/dev/zarr/zarrjava/utils/Utils.java +++ b/src/main/java/dev/zarr/zarrjava/utils/Utils.java @@ -107,4 +107,38 @@ public static int[] inversePermutation(int[] origin) { } return inverse; } + + /** + * Calculate default chunk shape when not specified. + * This implements JZarr's ArrayParams.build() logic, targeting chunks of approximately 512 elements. + * + * The algorithm divides each dimension by 512 to determine the number of ~512-sized chunks, + * then calculates chunk sizes that will cover the dimension. Note that the total coverage + * may slightly exceed the dimension size (e.g., for shape=1024, chunks=342 results in + * 3 chunks covering 1026 elements). This is intentional and matches JZarr behavior - + * Zarr handles out-of-bounds gracefully, and the goal is approximate chunk sizes rather + * than perfect tiling. + * + * @param shape the shape of the array + * @return the calculated default chunk shape + */ + public static int[] calculateDefaultChunks(long[] shape) { + int[] chunks = new int[shape.length]; + for (int i = 0; i < shape.length; i++) { + long shapeDim = shape[i]; + int numChunks = (int) (shapeDim / 512); + if (numChunks > 0) { + int chunkDim = (int) (shapeDim / (numChunks + 1)); + if (shapeDim % chunkDim == 0) { + chunks[i] = chunkDim; + } else { + chunks[i] = chunkDim + 1; + } + } else { + // If dimension is smaller than 512, use the full dimension + chunks[i] = (int) shapeDim; + } + } + return chunks; + } } diff --git a/src/main/java/dev/zarr/zarrjava/v2/ArrayMetadataBuilder.java b/src/main/java/dev/zarr/zarrjava/v2/ArrayMetadataBuilder.java index c2c805c..091762c 100644 --- a/src/main/java/dev/zarr/zarrjava/v2/ArrayMetadataBuilder.java +++ b/src/main/java/dev/zarr/zarrjava/v2/ArrayMetadataBuilder.java @@ -4,6 +4,7 @@ import dev.zarr.zarrjava.ZarrException; import dev.zarr.zarrjava.core.Attributes; import dev.zarr.zarrjava.core.chunkkeyencoding.Separator; +import dev.zarr.zarrjava.utils.Utils; import dev.zarr.zarrjava.v2.codec.Codec; import dev.zarr.zarrjava.v2.codec.core.BloscCodec; import dev.zarr.zarrjava.v2.codec.core.ZlibCodec; @@ -152,7 +153,7 @@ public ArrayMetadata build() throws ZarrException { // If chunks are not specified, calculate default chunks if (chunks == null) { - chunks = calculateDefaultChunks(shape); + chunks = Utils.calculateDefaultChunks(shape); } return new ArrayMetadata( @@ -168,35 +169,4 @@ public ArrayMetadata build() throws ZarrException { attributes ); } - - /** - * Calculate default chunk shape when not specified. - * This implements JZarr's ArrayParams.build() logic, targeting chunks of approximately 512 elements. - * - * The algorithm divides each dimension by 512 to determine the number of ~512-sized chunks, - * then calculates chunk sizes that will cover the dimension. Note that the total coverage - * may slightly exceed the dimension size (e.g., for shape=1024, chunks=342 results in - * 3 chunks covering 1026 elements). This is intentional and matches JZarr behavior - - * Zarr handles out-of-bounds gracefully, and the goal is approximate chunk sizes rather - * than perfect tiling. - */ - private int[] calculateDefaultChunks(long[] shape) { - int[] chunks = new int[shape.length]; - for (int i = 0; i < shape.length; i++) { - long shapeDim = shape[i]; - int numChunks = (int) (shapeDim / 512); - if (numChunks > 0) { - int chunkDim = (int) (shapeDim / (numChunks + 1)); - if (shapeDim % chunkDim == 0) { - chunks[i] = chunkDim; - } else { - chunks[i] = chunkDim + 1; - } - } else { - // If dimension is smaller than 512, use the full dimension - chunks[i] = (int) shapeDim; - } - } - return chunks; - } } \ No newline at end of file diff --git a/src/main/java/dev/zarr/zarrjava/v3/ArrayMetadataBuilder.java b/src/main/java/dev/zarr/zarrjava/v3/ArrayMetadataBuilder.java index ee976b8..5d5f494 100644 --- a/src/main/java/dev/zarr/zarrjava/v3/ArrayMetadataBuilder.java +++ b/src/main/java/dev/zarr/zarrjava/v3/ArrayMetadataBuilder.java @@ -4,6 +4,7 @@ import dev.zarr.zarrjava.core.Attributes; import dev.zarr.zarrjava.core.chunkkeyencoding.Separator; import dev.zarr.zarrjava.core.codec.core.BytesCodec.Endian; +import dev.zarr.zarrjava.utils.Utils; import dev.zarr.zarrjava.v3.chunkgrid.ChunkGrid; import dev.zarr.zarrjava.v3.chunkgrid.RegularChunkGrid; import dev.zarr.zarrjava.v3.chunkkeyencoding.ChunkKeyEncoding; @@ -164,7 +165,7 @@ public ArrayMetadata build() throws ZarrException { // If chunk grid is not specified, calculate default chunks if (chunkGrid == null) { - int[] defaultChunks = calculateDefaultChunks(shape); + int[] defaultChunks = Utils.calculateDefaultChunks(shape); chunkGrid = new RegularChunkGrid(new RegularChunkGrid.Configuration(defaultChunks)); } @@ -174,35 +175,4 @@ public ArrayMetadata build() throws ZarrException { storageTransformers ); } - - /** - * Calculate default chunk shape when not specified. - * This implements JZarr's ArrayParams.build() logic, targeting chunks of approximately 512 elements. - * - * The algorithm divides each dimension by 512 to determine the number of ~512-sized chunks, - * then calculates chunk sizes that will cover the dimension. Note that the total coverage - * may slightly exceed the dimension size (e.g., for shape=1024, chunks=342 results in - * 3 chunks covering 1026 elements). This is intentional and matches JZarr behavior - - * Zarr handles out-of-bounds gracefully, and the goal is approximate chunk sizes rather - * than perfect tiling. - */ - private int[] calculateDefaultChunks(long[] shape) { - int[] chunks = new int[shape.length]; - for (int i = 0; i < shape.length; i++) { - long shapeDim = shape[i]; - int numChunks = (int) (shapeDim / 512); - if (numChunks > 0) { - int chunkDim = (int) (shapeDim / (numChunks + 1)); - if (shapeDim % chunkDim == 0) { - chunks[i] = chunkDim; - } else { - chunks[i] = chunkDim + 1; - } - } else { - // If dimension is smaller than 512, use the full dimension - chunks[i] = (int) shapeDim; - } - } - return chunks; - } }