From c96c56107edf7833f8dacf59fa0f8ab6d108c910 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ianar=C3=A9=20S=C3=A9vi?= Date: Tue, 16 Dec 2025 16:44:40 +0100 Subject: [PATCH] :sparkles: add data-schema replace to v2 --- .../java/com/mindee/InferenceParameters.java | 19 ++++++-- src/main/java/com/mindee/MindeeClientV2.java | 29 ++++++------ .../java/com/mindee/http/MindeeHttpApiV2.java | 13 +++-- .../java/com/mindee/MindeeClientV2IT.java | 47 ++++++++++++++++--- .../java/com/mindee/TestingUtilities.java | 7 +++ .../com/mindee/parsing/v2/InferenceTest.java | 16 ++----- src/test/resources | 2 +- 7 files changed, 89 insertions(+), 44 deletions(-) diff --git a/src/main/java/com/mindee/InferenceParameters.java b/src/main/java/com/mindee/InferenceParameters.java index 38c6b4502..81c043bdb 100644 --- a/src/main/java/com/mindee/InferenceParameters.java +++ b/src/main/java/com/mindee/InferenceParameters.java @@ -11,7 +11,7 @@ @Data public final class InferenceParameters { /** - * ID of the model (required). + * Model ID to use for the inference (required). */ private final String modelId; /** @@ -36,18 +36,21 @@ public final class InferenceParameters { */ private final String alias; /** - * IDs of webhooks to propagate the API response to (may be empty). + * Webhook IDs to call after all processing is finished. If empty, no webhooks will be used. */ private final String[] webhookIds; /** * Polling options. Set only if having timeout issues. */ private final AsyncPollingOptions pollingOptions; - /** * Additional text context used by the model during inference. Not recommended, for specific use only. */ private final String textContext; + /** + * Dynamic changes to the data schema of the model for this inference. + */ + private final String dataSchema; /** * Create a new builder. @@ -72,6 +75,7 @@ public static final class Builder { private String alias; private String[] webhookIds = new String[]{}; private String textContext; + private String dataSchema; private AsyncPollingOptions pollingOptions = AsyncPollingOptions.builder().build(); private Builder(String modelId) { @@ -123,6 +127,12 @@ public Builder textContext(String textContext) { return this; } + /** Provide additional text context used by the model during inference. */ + public Builder dataSchema(String dataSchema) { + this.dataSchema = dataSchema; + return this; + } + /** Set polling options. */ public Builder pollingOptions(AsyncPollingOptions pollingOptions) { this.pollingOptions = pollingOptions; @@ -140,7 +150,8 @@ public InferenceParameters build() { alias, webhookIds, pollingOptions, - textContext + textContext, + dataSchema ); } } diff --git a/src/main/java/com/mindee/MindeeClientV2.java b/src/main/java/com/mindee/MindeeClientV2.java index abd32ba96..cf8a68942 100644 --- a/src/main/java/com/mindee/MindeeClientV2.java +++ b/src/main/java/com/mindee/MindeeClientV2.java @@ -1,11 +1,9 @@ package com.mindee; -import com.fasterxml.jackson.databind.ObjectMapper; import com.mindee.http.MindeeApiV2; import com.mindee.http.MindeeHttpApiV2; import com.mindee.http.MindeeHttpExceptionV2; import com.mindee.input.LocalInputSource; -import com.mindee.input.LocalResponse; import com.mindee.input.URLInputSource; import com.mindee.parsing.v2.ErrorResponse; import com.mindee.parsing.v2.InferenceResponse; @@ -18,18 +16,18 @@ public class MindeeClientV2 { private final MindeeApiV2 mindeeApi; - /** Uses an API-key read from the environment variables. */ + /** Uses an API key read from the environment variables. */ public MindeeClientV2() { this(createDefaultApiV2("")); } - /** Uses the supplied API-key. */ + /** Uses the supplied API key. */ public MindeeClientV2(String apiKey) { this(createDefaultApiV2(apiKey)); } - /** Inject both a PDF implementation and a HTTP implementation. */ + /** Inject both a PDF implementation and an HTTP implementation. */ public MindeeClientV2(MindeeApiV2 mindeeApi) { this.mindeeApi = mindeeApi; } @@ -39,7 +37,8 @@ public MindeeClientV2(MindeeApiV2 mindeeApi) { */ public JobResponse enqueueInference( LocalInputSource inputSource, - InferenceParameters params) throws IOException { + InferenceParameters params + ) throws IOException { return mindeeApi.reqPostInferenceEnqueue(inputSource, params); } @@ -49,7 +48,8 @@ public JobResponse enqueueInference( */ public JobResponse enqueueInference( URLInputSource inputSource, - InferenceParameters params) throws IOException { + InferenceParameters params + ) throws IOException { return mindeeApi.reqPostInferenceEnqueue(inputSource, params); } @@ -72,7 +72,6 @@ public InferenceResponse getInference(String inferenceId) { if (inferenceId == null || inferenceId.trim().isEmpty()) { throw new IllegalArgumentException("inferenceId must not be null or blank."); } - return mindeeApi.reqGetInference(inferenceId); } @@ -86,8 +85,8 @@ public InferenceResponse getInference(String inferenceId) { */ public InferenceResponse enqueueAndGetInference( LocalInputSource inputSource, - InferenceParameters options) throws IOException, InterruptedException { - + InferenceParameters options + ) throws IOException, InterruptedException { validatePollingOptions(options.getPollingOptions()); JobResponse job = enqueueInference(inputSource, options); return pollAndFetch(job, options); @@ -105,8 +104,8 @@ public InferenceResponse enqueueAndGetInference( */ public InferenceResponse enqueueAndGetInference( URLInputSource inputSource, - InferenceParameters options) throws IOException, InterruptedException { - + InferenceParameters options + ) throws IOException, InterruptedException { validatePollingOptions(options.getPollingOptions()); JobResponse job = enqueueInference(inputSource, options); return pollAndFetch(job, options); @@ -119,8 +118,10 @@ public InferenceResponse enqueueAndGetInference( * @return an instance of {@link InferenceResponse}. * @throws InterruptedException Throws if interrupted. */ - private InferenceResponse pollAndFetch(JobResponse initialJob, - InferenceParameters options) throws InterruptedException { + private InferenceResponse pollAndFetch( + JobResponse initialJob, + InferenceParameters options + ) throws InterruptedException { Thread.sleep((long) (options.getPollingOptions().getInitialDelaySec() * 1000)); JobResponse resp = initialJob; diff --git a/src/main/java/com/mindee/http/MindeeHttpApiV2.java b/src/main/java/com/mindee/http/MindeeHttpApiV2.java index 5283b56b1..d2698db31 100644 --- a/src/main/java/com/mindee/http/MindeeHttpApiV2.java +++ b/src/main/java/com/mindee/http/MindeeHttpApiV2.java @@ -250,13 +250,6 @@ private HttpEntity buildHttpBody( MultipartEntityBuilder builder, InferenceParameters params ) { - if (params.getTextContext() != null) { - builder.addTextBody( - "text_context", - params.getTextContext().toLowerCase() - ); - } - builder.addTextBody("model_id", params.getModelId()); if (params.getRag() != null) { builder.addTextBody("rag", params.getRag().toString().toLowerCase()); @@ -276,6 +269,12 @@ private HttpEntity buildHttpBody( if (params.getWebhookIds().length > 0) { builder.addTextBody("webhook_ids", String.join(",", params.getWebhookIds())); } + if (params.getTextContext() != null) { + builder.addTextBody("text_context", params.getTextContext()); + } + if (params.getDataSchema() != null) { + builder.addTextBody("data_schema", params.getDataSchema()); + } return builder.build(); } diff --git a/src/test/java/com/mindee/MindeeClientV2IT.java b/src/test/java/com/mindee/MindeeClientV2IT.java index 833fb8b28..0f3e6af55 100644 --- a/src/test/java/com/mindee/MindeeClientV2IT.java +++ b/src/test/java/com/mindee/MindeeClientV2IT.java @@ -1,5 +1,10 @@ package com.mindee; +import static com.mindee.TestingUtilities.getResourcePath; +import static com.mindee.TestingUtilities.getV2ResourcePath; +import static com.mindee.TestingUtilities.readFileAsString; +import static org.junit.jupiter.api.Assertions.*; + import com.mindee.http.MindeeHttpExceptionV2; import com.mindee.input.LocalInputSource; import com.mindee.input.URLInputSource; @@ -14,10 +19,6 @@ import java.io.IOException; import org.junit.jupiter.api.*; -import static com.mindee.TestingUtilities.getResourcePath; -import static com.mindee.TestingUtilities.getV1ResourcePathString; -import static org.junit.jupiter.api.Assertions.*; - @TestInstance(TestInstance.Lifecycle.PER_CLASS) @Tag("integration") @DisplayName("MindeeV2 – Integration Tests") @@ -45,7 +46,7 @@ void parseFile_emptyMultiPage_mustSucceed() throws IOException, InterruptedExcep .rawText(true) .polygon(null) .confidence(null) - .alias("java-integration-test") + .alias("java-integration-test_multipage") .textContext(null) .pollingOptions( AsyncPollingOptions.builder() @@ -90,12 +91,12 @@ void parseFile_emptyMultiPage_mustSucceed() throws IOException, InterruptedExcep @DisplayName("Filled, single-page image – enqueue & parse must succeed") void parseFile_filledSinglePage_mustSucceed() throws IOException, InterruptedException { LocalInputSource source = new LocalInputSource( - getV1ResourcePathString("products/financial_document/default_sample.jpg")); + getV2ResourcePath("products/financial_document/default_sample.jpg")); InferenceParameters params = InferenceParameters .builder(modelId) .rag(false) - .alias("java-integration-test") + .alias("java-integration-test_single-page") .textContext("this is an invoice") .build(); @@ -133,6 +134,38 @@ void parseFile_filledSinglePage_mustSucceed() throws IOException, InterruptedExc assertEquals("John Smith", supplierName.getStringValue()); } + @Test + @DisplayName("Data Schema Replace – enqueue & parse must succeed") + void parseFile_dataSchemaReplace_mustSucceed() throws IOException, InterruptedException { + LocalInputSource source = new LocalInputSource( + getV2ResourcePath("products/financial_document/default_sample.jpg")); + + InferenceParameters params = InferenceParameters + .builder(modelId) + .rag(false) + .alias("java-integration-test_data-schema-replace") + .dataSchema(readFileAsString(getV2ResourcePath("inference/data_schema_replace_param.json"))) + .build(); + + InferenceResponse response = mindeeClient.enqueueAndGetInference(source, params); + assertNotNull(response); + Inference inference = response.getInference(); + assertNotNull(inference); + + InferenceResult result = inference.getResult(); + assertNotNull(result); + + RawText rawText = result.getRawText(); + assertNull(rawText); + + InferenceFields fields = result.getFields(); + assertNotNull(fields); + + SimpleField supplierName = fields.getSimpleField("test_replace"); + assertNotNull(supplierName); + assertEquals("a test value", supplierName.getStringValue()); + } + @Test @DisplayName("Invalid model ID – enqueue must raise 422") diff --git a/src/test/java/com/mindee/TestingUtilities.java b/src/test/java/com/mindee/TestingUtilities.java index 71872ca86..5433aa9d5 100644 --- a/src/test/java/com/mindee/TestingUtilities.java +++ b/src/test/java/com/mindee/TestingUtilities.java @@ -26,6 +26,13 @@ public static String getV1ResourcePathString(String filePath) { return getV1ResourcePath(filePath).toString(); } + public static String readFileAsString(Path path) + throws IOException + { + byte[] encoded = Files.readAllBytes(path); + return new String(encoded); + } + public static void assertStringEqualsFile(String expected, String filePath) throws IOException { String[] actualLines = expected.split(System.lineSeparator()); List expectedLines = Files.readAllLines(Paths.get(filePath)); diff --git a/src/test/java/com/mindee/parsing/v2/InferenceTest.java b/src/test/java/com/mindee/parsing/v2/InferenceTest.java index 6cbf4bb48..790d1db14 100644 --- a/src/test/java/com/mindee/parsing/v2/InferenceTest.java +++ b/src/test/java/com/mindee/parsing/v2/InferenceTest.java @@ -1,5 +1,9 @@ package com.mindee.parsing.v2; +import static com.mindee.TestingUtilities.getV2ResourcePath; +import static com.mindee.TestingUtilities.readFileAsString; +import static org.junit.jupiter.api.Assertions.*; + import com.mindee.geometry.Point; import com.mindee.geometry.Polygon; import com.mindee.input.LocalResponse; @@ -12,7 +16,6 @@ import com.mindee.parsing.v2.field.ObjectField; import com.mindee.parsing.v2.field.DynamicField.FieldType; import java.io.IOException; -import java.nio.file.Files; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; @@ -20,8 +23,6 @@ import org.junit.jupiter.api.DisplayName; import org.junit.jupiter.api.Nested; import org.junit.jupiter.api.Test; -import static com.mindee.TestingUtilities.getV2ResourcePath; -import static org.junit.jupiter.api.Assertions.*; @DisplayName("MindeeV2 - Inference Tests") class InferenceTest { @@ -31,13 +32,6 @@ private InferenceResponse loadInference(String filePath) throws IOException { return localResponse.deserializeResponse(InferenceResponse.class); } - private String readFileAsString(String path) - throws IOException - { - byte[] encoded = Files.readAllBytes(getV2ResourcePath(path)); - return new String(encoded); - } - @Nested @DisplayName("Inference on blank file") @@ -525,7 +519,7 @@ class RstDisplay { @DisplayName("rst display must be parsed and exposed") void rstDisplay_mustBeAccessible() throws IOException { InferenceResponse resp = loadInference("inference/standard_field_types.json"); - String rstRef = readFileAsString("inference/standard_field_types.rst"); + String rstRef = readFileAsString(getV2ResourcePath("inference/standard_field_types.rst")); Inference inference = resp.getInference(); assertNotNull(inference); assertEquals(rstRef, resp.getInference().toString()); diff --git a/src/test/resources b/src/test/resources index 48d058a4c..0c51e1d3e 160000 --- a/src/test/resources +++ b/src/test/resources @@ -1 +1 @@ -Subproject commit 48d058a4c9c6fa8145b7c15b7732fa7180f609da +Subproject commit 0c51e1d3e2258404c44280f25f4951ba6fe27324