diff --git a/src/main/java/dev/zarr/zarrjava/utils/Utils.java b/src/main/java/dev/zarr/zarrjava/utils/Utils.java index 4a95d1e..d7c54df 100644 --- a/src/main/java/dev/zarr/zarrjava/utils/Utils.java +++ b/src/main/java/dev/zarr/zarrjava/utils/Utils.java @@ -107,4 +107,38 @@ public static int[] inversePermutation(int[] origin) { } return inverse; } + + /** + * Calculate default chunk shape when not specified. + * This implements JZarr's ArrayParams.build() logic, targeting chunks of approximately 512 elements. + * + * The algorithm divides each dimension by 512 to determine the number of ~512-sized chunks, + * then calculates chunk sizes that will cover the dimension. Note that the total coverage + * may slightly exceed the dimension size (e.g., for shape=1024, chunks=342 results in + * 3 chunks covering 1026 elements). This is intentional and matches JZarr behavior - + * Zarr handles out-of-bounds gracefully, and the goal is approximate chunk sizes rather + * than perfect tiling. + * + * @param shape the shape of the array + * @return the calculated default chunk shape + */ + public static int[] calculateDefaultChunks(long[] shape) { + int[] chunks = new int[shape.length]; + for (int i = 0; i < shape.length; i++) { + long shapeDim = shape[i]; + int numChunks = (int) (shapeDim / 512); + if (numChunks > 0) { + int chunkDim = (int) (shapeDim / (numChunks + 1)); + if (shapeDim % chunkDim == 0) { + chunks[i] = chunkDim; + } else { + chunks[i] = chunkDim + 1; + } + } else { + // If dimension is smaller than 512, use the full dimension + chunks[i] = (int) shapeDim; + } + } + return chunks; + } } diff --git a/src/main/java/dev/zarr/zarrjava/v2/Array.java b/src/main/java/dev/zarr/zarrjava/v2/Array.java index 87d767a..a34c85a 100644 --- a/src/main/java/dev/zarr/zarrjava/v2/Array.java +++ b/src/main/java/dev/zarr/zarrjava/v2/Array.java @@ -4,15 +4,18 @@ import com.fasterxml.jackson.databind.ObjectWriter; import dev.zarr.zarrjava.ZarrException; import dev.zarr.zarrjava.core.Attributes; +import dev.zarr.zarrjava.core.chunkkeyencoding.Separator; import dev.zarr.zarrjava.core.codec.CodecPipeline; import dev.zarr.zarrjava.store.FilesystemStore; import dev.zarr.zarrjava.store.MemoryStore; +import dev.zarr.zarrjava.store.Store; import dev.zarr.zarrjava.store.StoreHandle; import dev.zarr.zarrjava.utils.Utils; import dev.zarr.zarrjava.v2.codec.Codec; import dev.zarr.zarrjava.v2.codec.core.BytesCodec; import javax.annotation.Nonnull; +import javax.annotation.Nullable; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.file.Path; diff --git a/src/main/java/dev/zarr/zarrjava/v2/ArrayMetadataBuilder.java b/src/main/java/dev/zarr/zarrjava/v2/ArrayMetadataBuilder.java index 8ec6707..091762c 100644 --- a/src/main/java/dev/zarr/zarrjava/v2/ArrayMetadataBuilder.java +++ b/src/main/java/dev/zarr/zarrjava/v2/ArrayMetadataBuilder.java @@ -4,6 +4,7 @@ import dev.zarr.zarrjava.ZarrException; import dev.zarr.zarrjava.core.Attributes; import dev.zarr.zarrjava.core.chunkkeyencoding.Separator; +import dev.zarr.zarrjava.utils.Utils; import dev.zarr.zarrjava.v2.codec.Codec; import dev.zarr.zarrjava.v2.codec.core.BloscCodec; import dev.zarr.zarrjava.v2.codec.core.ZlibCodec; @@ -146,12 +147,15 @@ public ArrayMetadata build() throws ZarrException { if (shape == null) { throw new IllegalStateException("Please call `withShape` first."); } - if (chunks == null) { - throw new IllegalStateException("Please call `withChunks` first."); - } if (dataType == null) { throw new IllegalStateException("Please call `withDataType` first."); } + + // If chunks are not specified, calculate default chunks + if (chunks == null) { + chunks = Utils.calculateDefaultChunks(shape); + } + return new ArrayMetadata( 2, shape, diff --git a/src/main/java/dev/zarr/zarrjava/v3/ArrayMetadataBuilder.java b/src/main/java/dev/zarr/zarrjava/v3/ArrayMetadataBuilder.java index 83a6e1a..5d5f494 100644 --- a/src/main/java/dev/zarr/zarrjava/v3/ArrayMetadataBuilder.java +++ b/src/main/java/dev/zarr/zarrjava/v3/ArrayMetadataBuilder.java @@ -4,6 +4,7 @@ import dev.zarr.zarrjava.core.Attributes; import dev.zarr.zarrjava.core.chunkkeyencoding.Separator; import dev.zarr.zarrjava.core.codec.core.BytesCodec.Endian; +import dev.zarr.zarrjava.utils.Utils; import dev.zarr.zarrjava.v3.chunkgrid.ChunkGrid; import dev.zarr.zarrjava.v3.chunkgrid.RegularChunkGrid; import dev.zarr.zarrjava.v3.chunkkeyencoding.ChunkKeyEncoding; @@ -161,9 +162,13 @@ public ArrayMetadata build() throws ZarrException { if (dataType == null) { throw new ZarrException("Data type needs to be provided. Please call `.withDataType`."); } + + // If chunk grid is not specified, calculate default chunks if (chunkGrid == null) { - throw new ZarrException("Chunk grid needs to be provided. Please call `.withChunkShape`."); + int[] defaultChunks = Utils.calculateDefaultChunks(shape); + chunkGrid = new RegularChunkGrid(new RegularChunkGrid.Configuration(defaultChunks)); } + return new ArrayMetadata(shape, dataType, chunkGrid, chunkKeyEncoding, fillValue, codecs, dimensionNames, attributes, diff --git a/src/test/java/dev/zarr/zarrjava/ZarrV2Test.java b/src/test/java/dev/zarr/zarrjava/ZarrV2Test.java index 534a351..3bd3a88 100644 --- a/src/test/java/dev/zarr/zarrjava/ZarrV2Test.java +++ b/src/test/java/dev/zarr/zarrjava/ZarrV2Test.java @@ -406,4 +406,50 @@ public void testMemoryStore() throws ZarrException, IOException { Assertions.assertEquals(2, group.list().count()); } -} \ No newline at end of file + @Test + public void testDefaultChunkShape() throws IOException, ZarrException { + // Test with a small array (< 512 elements per dimension) + Array smallArray = Array.create( + new FilesystemStore(TESTOUTPUT).resolve("v2_default_chunks_small"), + Array.metadataBuilder() + .withShape(100, 50) + .withDataType(DataType.UINT8) + .build() + ); + Assertions.assertEquals(2, smallArray.metadata().chunks.length); + // Both dimensions < 512, so chunks should equal shape + Assertions.assertEquals(100, smallArray.metadata().chunks[0]); + Assertions.assertEquals(50, smallArray.metadata().chunks[1]); + + // Test with a larger array (> 512 elements per dimension) + Array largeArray = Array.create( + new FilesystemStore(TESTOUTPUT).resolve("v2_default_chunks_large"), + Array.metadataBuilder() + .withShape(2000, 1500) + .withDataType(DataType.UINT8) + .build() + ); + Assertions.assertEquals(2, largeArray.metadata().chunks.length); + // Chunks should be calculated based on division by 512 + Assertions.assertTrue(largeArray.metadata().chunks[0] > 0); + Assertions.assertTrue(largeArray.metadata().chunks[0] < 2000); + Assertions.assertTrue(largeArray.metadata().chunks[1] > 0); + Assertions.assertTrue(largeArray.metadata().chunks[1] < 1500); + + // Test with mixed dimensions + Array mixedArray = Array.create( + new FilesystemStore(TESTOUTPUT).resolve("v2_default_chunks_mixed"), + Array.metadataBuilder() + .withShape(1024, 100, 2048) + .withDataType(DataType.UINT8) + .build() + ); + Assertions.assertEquals(3, mixedArray.metadata().chunks.length); + // Verify chunks are reasonable + Assertions.assertTrue(mixedArray.metadata().chunks[0] > 0); + Assertions.assertTrue(mixedArray.metadata().chunks[0] <= 1024); + Assertions.assertEquals(100, mixedArray.metadata().chunks[1]); // < 512, should equal shape + Assertions.assertTrue(mixedArray.metadata().chunks[2] > 0); + Assertions.assertTrue(mixedArray.metadata().chunks[2] <= 2048); + } +} diff --git a/src/test/java/dev/zarr/zarrjava/ZarrV3Test.java b/src/test/java/dev/zarr/zarrjava/ZarrV3Test.java index e8f1e55..1fcb8dd 100644 --- a/src/test/java/dev/zarr/zarrjava/ZarrV3Test.java +++ b/src/test/java/dev/zarr/zarrjava/ZarrV3Test.java @@ -775,4 +775,51 @@ public void testUnalignedArrayAccess(int arrayShape, int chunkShape, int accessS Assertions.assertArrayEquals(expectedData, (int[]) result.get1DJavaArray(ucar.ma2.DataType.UINT)); } } -} \ No newline at end of file + + @Test + public void testDefaultChunkShape() throws IOException, ZarrException { + // Test with a small array (< 512 elements per dimension) + Array smallArray = Array.create( + new FilesystemStore(TESTOUTPUT).resolve("v3_default_chunks_small"), + Array.metadataBuilder() + .withShape(100, 50) + .withDataType(DataType.UINT8) + .build() + ); + Assertions.assertEquals(2, smallArray.metadata().chunkShape().length); + // Both dimensions < 512, so chunks should equal shape + Assertions.assertEquals(100, smallArray.metadata().chunkShape()[0]); + Assertions.assertEquals(50, smallArray.metadata().chunkShape()[1]); + + // Test with a larger array (> 512 elements per dimension) + Array largeArray = Array.create( + new FilesystemStore(TESTOUTPUT).resolve("v3_default_chunks_large"), + Array.metadataBuilder() + .withShape(2000, 1500) + .withDataType(DataType.UINT8) + .build() + ); + Assertions.assertEquals(2, largeArray.metadata().chunkShape().length); + // Chunks should be calculated based on division by 512 + Assertions.assertTrue(largeArray.metadata().chunkShape()[0] > 0); + Assertions.assertTrue(largeArray.metadata().chunkShape()[0] < 2000); + Assertions.assertTrue(largeArray.metadata().chunkShape()[1] > 0); + Assertions.assertTrue(largeArray.metadata().chunkShape()[1] < 1500); + + // Test with mixed dimensions + Array mixedArray = Array.create( + new FilesystemStore(TESTOUTPUT).resolve("v3_default_chunks_mixed"), + Array.metadataBuilder() + .withShape(1024, 100, 2048) + .withDataType(DataType.UINT8) + .build() + ); + Assertions.assertEquals(3, mixedArray.metadata().chunkShape().length); + // Verify chunks are reasonable + Assertions.assertTrue(mixedArray.metadata().chunkShape()[0] > 0); + Assertions.assertTrue(mixedArray.metadata().chunkShape()[0] <= 1024); + Assertions.assertEquals(100, mixedArray.metadata().chunkShape()[1]); // < 512, should equal shape + Assertions.assertTrue(mixedArray.metadata().chunkShape()[2] > 0); + Assertions.assertTrue(mixedArray.metadata().chunkShape()[2] <= 2048); + } +}