Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions src/main/java/dev/zarr/zarrjava/utils/Utils.java
Original file line number Diff line number Diff line change
Expand Up @@ -107,4 +107,38 @@ public static int[] inversePermutation(int[] origin) {
}
return inverse;
}

/**
* Calculate default chunk shape when not specified.
* This implements JZarr's ArrayParams.build() logic, targeting chunks of approximately 512 elements.
*
* The algorithm divides each dimension by 512 to determine the number of ~512-sized chunks,
* then calculates chunk sizes that will cover the dimension. Note that the total coverage
* may slightly exceed the dimension size (e.g., for shape=1024, chunks=342 results in
* 3 chunks covering 1026 elements). This is intentional and matches JZarr behavior -
* Zarr handles out-of-bounds gracefully, and the goal is approximate chunk sizes rather
* than perfect tiling.
*
* @param shape the shape of the array
* @return the calculated default chunk shape
*/
public static int[] calculateDefaultChunks(long[] shape) {
int[] chunks = new int[shape.length];
for (int i = 0; i < shape.length; i++) {
long shapeDim = shape[i];
int numChunks = (int) (shapeDim / 512);
if (numChunks > 0) {
int chunkDim = (int) (shapeDim / (numChunks + 1));
if (shapeDim % chunkDim == 0) {
chunks[i] = chunkDim;
} else {
chunks[i] = chunkDim + 1;
}
} else {
// If dimension is smaller than 512, use the full dimension
chunks[i] = (int) shapeDim;
}
}
return chunks;
}
}
3 changes: 3 additions & 0 deletions src/main/java/dev/zarr/zarrjava/v2/Array.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,18 @@
import com.fasterxml.jackson.databind.ObjectWriter;
import dev.zarr.zarrjava.ZarrException;
import dev.zarr.zarrjava.core.Attributes;
import dev.zarr.zarrjava.core.chunkkeyencoding.Separator;
import dev.zarr.zarrjava.core.codec.CodecPipeline;
import dev.zarr.zarrjava.store.FilesystemStore;
import dev.zarr.zarrjava.store.MemoryStore;
import dev.zarr.zarrjava.store.Store;
import dev.zarr.zarrjava.store.StoreHandle;
import dev.zarr.zarrjava.utils.Utils;
import dev.zarr.zarrjava.v2.codec.Codec;
import dev.zarr.zarrjava.v2.codec.core.BytesCodec;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.file.Path;
Expand Down
10 changes: 7 additions & 3 deletions src/main/java/dev/zarr/zarrjava/v2/ArrayMetadataBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import dev.zarr.zarrjava.ZarrException;
import dev.zarr.zarrjava.core.Attributes;
import dev.zarr.zarrjava.core.chunkkeyencoding.Separator;
import dev.zarr.zarrjava.utils.Utils;
import dev.zarr.zarrjava.v2.codec.Codec;
import dev.zarr.zarrjava.v2.codec.core.BloscCodec;
import dev.zarr.zarrjava.v2.codec.core.ZlibCodec;
Expand Down Expand Up @@ -146,12 +147,15 @@ public ArrayMetadata build() throws ZarrException {
if (shape == null) {
throw new IllegalStateException("Please call `withShape` first.");
}
if (chunks == null) {
throw new IllegalStateException("Please call `withChunks` first.");
}
if (dataType == null) {
throw new IllegalStateException("Please call `withDataType` first.");
}

// If chunks are not specified, calculate default chunks
if (chunks == null) {
chunks = Utils.calculateDefaultChunks(shape);
}

return new ArrayMetadata(
2,
shape,
Expand Down
7 changes: 6 additions & 1 deletion src/main/java/dev/zarr/zarrjava/v3/ArrayMetadataBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import dev.zarr.zarrjava.core.Attributes;
import dev.zarr.zarrjava.core.chunkkeyencoding.Separator;
import dev.zarr.zarrjava.core.codec.core.BytesCodec.Endian;
import dev.zarr.zarrjava.utils.Utils;
import dev.zarr.zarrjava.v3.chunkgrid.ChunkGrid;
import dev.zarr.zarrjava.v3.chunkgrid.RegularChunkGrid;
import dev.zarr.zarrjava.v3.chunkkeyencoding.ChunkKeyEncoding;
Expand Down Expand Up @@ -161,9 +162,13 @@ public ArrayMetadata build() throws ZarrException {
if (dataType == null) {
throw new ZarrException("Data type needs to be provided. Please call `.withDataType`.");
}

// If chunk grid is not specified, calculate default chunks
if (chunkGrid == null) {
throw new ZarrException("Chunk grid needs to be provided. Please call `.withChunkShape`.");
int[] defaultChunks = Utils.calculateDefaultChunks(shape);
chunkGrid = new RegularChunkGrid(new RegularChunkGrid.Configuration(defaultChunks));
}

return new ArrayMetadata(shape, dataType, chunkGrid, chunkKeyEncoding, fillValue, codecs,
dimensionNames,
attributes,
Expand Down
48 changes: 47 additions & 1 deletion src/test/java/dev/zarr/zarrjava/ZarrV2Test.java
Original file line number Diff line number Diff line change
Expand Up @@ -406,4 +406,50 @@ public void testMemoryStore() throws ZarrException, IOException {
Assertions.assertEquals(2, group.list().count());
}

}
@Test
public void testDefaultChunkShape() throws IOException, ZarrException {
// Test with a small array (< 512 elements per dimension)
Array smallArray = Array.create(
new FilesystemStore(TESTOUTPUT).resolve("v2_default_chunks_small"),
Array.metadataBuilder()
.withShape(100, 50)
.withDataType(DataType.UINT8)
.build()
);
Assertions.assertEquals(2, smallArray.metadata().chunks.length);
// Both dimensions < 512, so chunks should equal shape
Assertions.assertEquals(100, smallArray.metadata().chunks[0]);
Assertions.assertEquals(50, smallArray.metadata().chunks[1]);

// Test with a larger array (> 512 elements per dimension)
Array largeArray = Array.create(
new FilesystemStore(TESTOUTPUT).resolve("v2_default_chunks_large"),
Array.metadataBuilder()
.withShape(2000, 1500)
.withDataType(DataType.UINT8)
.build()
);
Assertions.assertEquals(2, largeArray.metadata().chunks.length);
// Chunks should be calculated based on division by 512
Assertions.assertTrue(largeArray.metadata().chunks[0] > 0);
Assertions.assertTrue(largeArray.metadata().chunks[0] < 2000);
Assertions.assertTrue(largeArray.metadata().chunks[1] > 0);
Assertions.assertTrue(largeArray.metadata().chunks[1] < 1500);

// Test with mixed dimensions
Array mixedArray = Array.create(
new FilesystemStore(TESTOUTPUT).resolve("v2_default_chunks_mixed"),
Array.metadataBuilder()
.withShape(1024, 100, 2048)
.withDataType(DataType.UINT8)
.build()
);
Assertions.assertEquals(3, mixedArray.metadata().chunks.length);
// Verify chunks are reasonable
Assertions.assertTrue(mixedArray.metadata().chunks[0] > 0);
Assertions.assertTrue(mixedArray.metadata().chunks[0] <= 1024);
Assertions.assertEquals(100, mixedArray.metadata().chunks[1]); // < 512, should equal shape
Assertions.assertTrue(mixedArray.metadata().chunks[2] > 0);
Assertions.assertTrue(mixedArray.metadata().chunks[2] <= 2048);
}
}
49 changes: 48 additions & 1 deletion src/test/java/dev/zarr/zarrjava/ZarrV3Test.java
Original file line number Diff line number Diff line change
Expand Up @@ -775,4 +775,51 @@ public void testUnalignedArrayAccess(int arrayShape, int chunkShape, int accessS
Assertions.assertArrayEquals(expectedData, (int[]) result.get1DJavaArray(ucar.ma2.DataType.UINT));
}
}
}

@Test
public void testDefaultChunkShape() throws IOException, ZarrException {
// Test with a small array (< 512 elements per dimension)
Array smallArray = Array.create(
new FilesystemStore(TESTOUTPUT).resolve("v3_default_chunks_small"),
Array.metadataBuilder()
.withShape(100, 50)
.withDataType(DataType.UINT8)
.build()
);
Assertions.assertEquals(2, smallArray.metadata().chunkShape().length);
// Both dimensions < 512, so chunks should equal shape
Assertions.assertEquals(100, smallArray.metadata().chunkShape()[0]);
Assertions.assertEquals(50, smallArray.metadata().chunkShape()[1]);

// Test with a larger array (> 512 elements per dimension)
Array largeArray = Array.create(
new FilesystemStore(TESTOUTPUT).resolve("v3_default_chunks_large"),
Array.metadataBuilder()
.withShape(2000, 1500)
.withDataType(DataType.UINT8)
.build()
);
Assertions.assertEquals(2, largeArray.metadata().chunkShape().length);
// Chunks should be calculated based on division by 512
Assertions.assertTrue(largeArray.metadata().chunkShape()[0] > 0);
Assertions.assertTrue(largeArray.metadata().chunkShape()[0] < 2000);
Assertions.assertTrue(largeArray.metadata().chunkShape()[1] > 0);
Assertions.assertTrue(largeArray.metadata().chunkShape()[1] < 1500);

// Test with mixed dimensions
Array mixedArray = Array.create(
new FilesystemStore(TESTOUTPUT).resolve("v3_default_chunks_mixed"),
Array.metadataBuilder()
.withShape(1024, 100, 2048)
.withDataType(DataType.UINT8)
.build()
);
Assertions.assertEquals(3, mixedArray.metadata().chunkShape().length);
// Verify chunks are reasonable
Assertions.assertTrue(mixedArray.metadata().chunkShape()[0] > 0);
Assertions.assertTrue(mixedArray.metadata().chunkShape()[0] <= 1024);
Assertions.assertEquals(100, mixedArray.metadata().chunkShape()[1]); // < 512, should equal shape
Assertions.assertTrue(mixedArray.metadata().chunkShape()[2] > 0);
Assertions.assertTrue(mixedArray.metadata().chunkShape()[2] <= 2048);
}
}
Loading