Skip to content

Commit 8f98ebc

Browse files
✨ add support for split utility (#380)
1 parent c749640 commit 8f98ebc

35 files changed

+529
-127
lines changed

.github/workflows/_test-code-samples.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ jobs:
4040
4141
- name: Tests code samples
4242
run: |
43-
./tests/test_code_samples.sh ${{ secrets.MINDEE_ACCOUNT_SE_TESTS }} ${{ secrets.MINDEE_ENDPOINT_SE_TESTS }} ${{ secrets.MINDEE_API_KEY_SE_TESTS }} ${{ secrets.MINDEE_V2_SE_TESTS_API_KEY }} ${{ secrets.MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID }}
43+
./tests/test_code_samples.sh ${{ secrets.MINDEE_ACCOUNT_SE_TESTS }} ${{ secrets.MINDEE_ENDPOINT_SE_TESTS }} ${{ secrets.MINDEE_API_KEY_SE_TESTS }} ${{ secrets.MINDEE_V2_SE_TESTS_API_KEY }} ${{ secrets.MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID }} ${{ secrets.MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID }}
4444
4545
- name: Notify Slack Action on Failure
4646
uses: ravsamhq/notify-slack-action@2.3.0

.github/workflows/_test-integrations.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ jobs:
4949
MINDEE_V2_API_KEY: ${{ secrets.MINDEE_V2_SE_TESTS_API_KEY }}
5050
MINDEE_V2_FINDOC_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_FINDOC_MODEL_ID }}
5151
MINDEE_V2_SE_TESTS_BLANK_PDF_URL: ${{ secrets.MINDEE_V2_SE_TESTS_BLANK_PDF_URL }}
52+
MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID: ${{ secrets.MINDEE_V2_SE_TESTS_SPLIT_MODEL_ID }}
5253
run: |
5354
pytest --cov mindee -m integration
5455
Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from mindee import ClientV2, InferenceParameters, PathInput
1+
from mindee import ClientV2, InferenceParameters, InferenceResponse, PathInput
22

33
input_path = "/path/to/the/file.ext"
44
api_key = "MY_API_KEY"
@@ -29,8 +29,10 @@ params = InferenceParameters(
2929
input_source = PathInput(input_path)
3030

3131
# Send for processing
32-
response = mindee_client.enqueue_and_get_inference(
33-
input_source, params
32+
response = mindee_client.enqueue_and_get_result(
33+
InferenceResponse,
34+
input_source,
35+
params,
3436
)
3537

3638
# Print a brief summary of the parsed data
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
from mindee import ClientV2, SplitParameters, SplitResponse, PathInput
2+
3+
input_path = "/path/to/the/file.ext"
4+
api_key = "MY_API_KEY"
5+
model_id = "MY_SPLIT_MODEL_ID"
6+
7+
# Init a new client
8+
mindee_client = ClientV2(api_key)
9+
10+
# Set inference parameters
11+
params = SplitParameters(
12+
# ID of the model, required.
13+
model_id=model_id,
14+
)
15+
16+
# Load a file from disk
17+
input_source = PathInput(input_path)
18+
19+
# Send for processing
20+
response = mindee_client.enqueue_and_get_result(
21+
SplitResponse,
22+
input_source,
23+
params,
24+
)
25+
26+
# Print a brief summary of the parsed data
27+
print(response.inference)

mindee/__init__.py

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
from mindee import product
22
from mindee.client import Client
33
from mindee.client_v2 import ClientV2
4+
from mindee.input import LocalResponse, PageOptions, PollingOptions
45
from mindee.input.inference_parameters import (
5-
InferenceParameters,
6-
DataSchemaField,
76
DataSchema,
7+
DataSchemaField,
88
DataSchemaReplace,
9+
InferenceParameters,
910
)
10-
from mindee.input import LocalResponse, PageOptions, PollingOptions
1111
from mindee.input.sources import (
1212
Base64Input,
1313
BytesInput,
@@ -22,29 +22,33 @@
2222
from mindee.parsing.common.predict_response import PredictResponse
2323
from mindee.parsing.common.workflow_response import WorkflowResponse
2424
from mindee.parsing.v2 import InferenceResponse, JobResponse
25+
from mindee.v2.product.split.split_parameters import SplitParameters
26+
from mindee.v2.product.split.split_response import SplitResponse
2527

2628
__all__ = [
29+
"ApiResponse",
30+
"AsyncPredictResponse",
31+
"Base64Input",
32+
"BytesInput",
2733
"Client",
2834
"ClientV2",
2935
"DataSchema",
3036
"DataSchemaField",
3137
"DataSchemaReplace",
32-
"InferenceParameters",
38+
"FeedbackResponse",
3339
"FileInput",
34-
"PathInput",
35-
"BytesInput",
36-
"Base64Input",
37-
"UrlInputSource",
40+
"InferenceParameters",
41+
"InferenceResponse",
42+
"Job",
43+
"JobResponse",
3844
"LocalResponse",
3945
"PageOptions",
46+
"PathInput",
4047
"PollingOptions",
41-
"ApiResponse",
42-
"AsyncPredictResponse",
43-
"FeedbackResponse",
4448
"PredictResponse",
49+
"SplitParameters",
50+
"SplitResponse",
51+
"UrlInputSource",
4552
"WorkflowResponse",
46-
"JobResponse",
47-
"Job",
48-
"InferenceResponse",
4953
"product",
5054
]

mindee/client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,7 @@ def enqueue_and_parse( # pylint: disable=too-many-locals
353353
if poll_results.job.status == "failed":
354354
raise MindeeError("Parsing failed for job {poll_results.job.id}")
355355
logger.debug(
356-
"Polling server for parsing result with job id: %s", queue_result.job.id
356+
"Polling server for product result with job id: %s", queue_result.job.id
357357
)
358358
retry_counter += 1
359359
sleep(delay_sec)

mindee/client_v2.py

Lines changed: 73 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
1+
import warnings
12
from time import sleep
2-
from typing import Optional, Union
3+
from typing import Optional, Union, Type, TypeVar
34

45
from mindee.client_mixin import ClientMixin
56
from mindee.error.mindee_error import MindeeError
67
from mindee.error.mindee_http_error_v2 import handle_error_v2
7-
from mindee.input import UrlInputSource
8+
from mindee.input import UrlInputSource, BaseParameters
89
from mindee.input.inference_parameters import InferenceParameters
910
from mindee.input.polling_options import PollingOptions
1011
from mindee.input.sources.local_input_source import LocalInputSource
@@ -15,9 +16,12 @@
1516
is_valid_post_response,
1617
)
1718
from mindee.parsing.v2.common_response import CommonStatus
19+
from mindee.v2.parsing.inference.base_response import BaseResponse
1820
from mindee.parsing.v2.inference_response import InferenceResponse
1921
from mindee.parsing.v2.job_response import JobResponse
2022

23+
TypeBaseResponse = TypeVar("TypeBaseResponse", bound=BaseResponse)
24+
2125

2226
class ClientV2(ClientMixin):
2327
"""
@@ -41,20 +45,34 @@ def __init__(self, api_key: Optional[str] = None) -> None:
4145
def enqueue_inference(
4246
self,
4347
input_source: Union[LocalInputSource, UrlInputSource],
44-
params: InferenceParameters,
48+
params: BaseParameters,
49+
disable_redundant_warnings: bool = False,
50+
) -> JobResponse:
51+
"""[Deprecated] Use `enqueue` instead."""
52+
if not disable_redundant_warnings:
53+
warnings.warn(
54+
"enqueue_inference is deprecated; use enqueue instead",
55+
DeprecationWarning,
56+
stacklevel=2,
57+
)
58+
return self.enqueue(input_source, params)
59+
60+
def enqueue(
61+
self,
62+
input_source: Union[LocalInputSource, UrlInputSource],
63+
params: BaseParameters,
4564
) -> JobResponse:
4665
"""
4766
Enqueues a document to a given model.
4867
4968
:param input_source: The document/source file to use. Can be local or remote.
50-
5169
:param params: Parameters to set when sending a file.
70+
5271
:return: A valid inference response.
5372
"""
5473
logger.debug("Enqueuing inference using model: %s", params.model_id)
55-
5674
response = self.mindee_api.req_post_inference_enqueue(
57-
input_source=input_source, params=params
75+
input_source=input_source, params=params, slug=params.get_enqueue_slug()
5876
)
5977
dict_response = response.json()
6078

@@ -79,34 +97,49 @@ def get_job(self, job_id: str) -> JobResponse:
7997
dict_response = response.json()
8098
return JobResponse(dict_response)
8199

82-
def get_inference(self, inference_id: str) -> InferenceResponse:
100+
def get_inference(
101+
self,
102+
inference_id: str,
103+
) -> BaseResponse:
104+
"""[Deprecated] Use `get_result` instead."""
105+
return self.get_result(InferenceResponse, inference_id)
106+
107+
def get_result(
108+
self,
109+
response_type: Type[TypeBaseResponse],
110+
inference_id: str,
111+
) -> TypeBaseResponse:
83112
"""
84113
Get the result of an inference that was previously enqueued.
85114
86115
The inference will only be available after it has finished processing.
87116
88117
:param inference_id: UUID of the inference to retrieve.
118+
:param response_type: Class of the product to instantiate.
89119
:return: An inference response.
90120
"""
91121
logger.debug("Fetching inference: %s", inference_id)
92122

93-
response = self.mindee_api.req_get_inference(inference_id)
123+
response = self.mindee_api.req_get_inference(
124+
inference_id, response_type.get_result_slug()
125+
)
94126
if not is_valid_get_response(response):
95127
handle_error_v2(response.json())
96128
dict_response = response.json()
97-
return InferenceResponse(dict_response)
129+
return response_type(dict_response)
98130

99-
def enqueue_and_get_inference(
131+
def enqueue_and_get_result(
100132
self,
133+
response_type: Type[TypeBaseResponse],
101134
input_source: Union[LocalInputSource, UrlInputSource],
102-
params: InferenceParameters,
103-
) -> InferenceResponse:
135+
params: BaseParameters,
136+
) -> TypeBaseResponse:
104137
"""
105138
Enqueues to an asynchronous endpoint and automatically polls for a response.
106139
107140
:param input_source: The document/source file to use. Can be local or remote.
108-
109141
:param params: Parameters to set when sending a file.
142+
:param response_type: The product class to use for the response object.
110143
111144
:return: A valid inference response.
112145
"""
@@ -117,14 +150,15 @@ def enqueue_and_get_inference(
117150
params.polling_options.delay_sec,
118151
params.polling_options.max_retries,
119152
)
120-
enqueue_response = self.enqueue_inference(input_source, params)
153+
enqueue_response = self.enqueue_inference(input_source, params, True)
121154
logger.debug(
122-
"Successfully enqueued inference with job id: %s", enqueue_response.job.id
155+
"Successfully enqueued document with job id: %s", enqueue_response.job.id
123156
)
124157
sleep(params.polling_options.initial_delay_sec)
125158
try_counter = 0
126159
while try_counter < params.polling_options.max_retries:
127160
job_response = self.get_job(enqueue_response.job.id)
161+
assert isinstance(job_response, JobResponse)
128162
if job_response.job.status == CommonStatus.FAILED.value:
129163
if job_response.job.error:
130164
detail = job_response.job.error.detail
@@ -134,8 +168,31 @@ def enqueue_and_get_inference(
134168
f"Parsing failed for job {job_response.job.id}: {detail}"
135169
)
136170
if job_response.job.status == CommonStatus.PROCESSED.value:
137-
return self.get_inference(job_response.job.id)
171+
result = self.get_result(
172+
response_type or InferenceResponse, job_response.job.id
173+
)
174+
assert isinstance(result, response_type), (
175+
f'Invalid response type "{type(result)}"'
176+
)
177+
return result
138178
try_counter += 1
139179
sleep(params.polling_options.delay_sec)
140180

141181
raise MindeeError(f"Couldn't retrieve document after {try_counter + 1} tries.")
182+
183+
def enqueue_and_get_inference(
184+
self,
185+
input_source: Union[LocalInputSource, UrlInputSource],
186+
params: InferenceParameters,
187+
) -> InferenceResponse:
188+
"""[Deprecated] Use `enqueue_and_get_result` instead."""
189+
warnings.warn(
190+
"enqueue_and_get_inference is deprecated; use enqueue_and_get_result instead",
191+
DeprecationWarning,
192+
stacklevel=2,
193+
)
194+
response = self.enqueue_and_get_result(InferenceResponse, input_source, params)
195+
assert isinstance(response, InferenceResponse), (
196+
f'Invalid response type "{type(response)}"'
197+
)
198+
return response

mindee/error/mindee_http_error_v2.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import json
2-
from typing import Optional
2+
from typing import List, Optional
33

44
from mindee.parsing.common.string_dict import StringDict
55
from mindee.parsing.v2 import ErrorItem, ErrorResponse
@@ -18,7 +18,7 @@ def __init__(self, response: ErrorResponse) -> None:
1818
self.title = response.title
1919
self.code = response.code
2020
self.detail = response.detail
21-
self.errors: list[ErrorItem] = response.errors
21+
self.errors: List[ErrorItem] = response.errors
2222
super().__init__(
2323
f"HTTP {self.status} - {self.title} :: {self.code} - {self.detail}"
2424
)

mindee/input/__init__.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
from mindee.input.local_response import LocalResponse
2+
from mindee.input.base_parameters import BaseParameters
3+
from mindee.input.inference_parameters import InferenceParameters
4+
from mindee.v2.product.split.split_parameters import SplitParameters
25
from mindee.input.page_options import PageOptions
36
from mindee.input.polling_options import PollingOptions
47
from mindee.input.sources.base_64_input import Base64Input
@@ -11,15 +14,18 @@
1114
from mindee.input.workflow_options import WorkflowOptions
1215

1316
__all__ = [
17+
"Base64Input",
18+
"BaseParameters",
19+
"BytesInput",
20+
"FileInput",
1421
"InputType",
22+
"InferenceParameters",
1523
"LocalInputSource",
16-
"UrlInputSource",
24+
"LocalResponse",
25+
"PageOptions",
1726
"PathInput",
18-
"FileInput",
19-
"Base64Input",
20-
"BytesInput",
21-
"WorkflowOptions",
2227
"PollingOptions",
23-
"PageOptions",
24-
"LocalResponse",
28+
"UrlInputSource",
29+
"SplitParameters",
30+
"WorkflowOptions",
2531
]

0 commit comments

Comments
 (0)