From a13ca7bfebffb5f436476208a2f00f564f6b5700 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Thu, 10 Jul 2025 16:51:38 +0200 Subject: [PATCH 1/4] :recycle: fix syntaxes for V2, bump test lib --- mindee/client_v2.py | 12 ++++++------ mindee/parsing/v2/__init__.py | 2 +- mindee/parsing/v2/inference_options.py | 13 ++----------- mindee/parsing/v2/job.py | 2 ++ .../v2/{polling_response.py => job_response.py} | 2 +- mindee/parsing/v2/raw_text.py | 14 ++++++++++++++ tests/data | 2 +- tests/test_client_v2.py | 4 ++-- 8 files changed, 29 insertions(+), 22 deletions(-) rename mindee/parsing/v2/{polling_response.py => job_response.py} (91%) create mode 100644 mindee/parsing/v2/raw_text.py diff --git a/mindee/client_v2.py b/mindee/client_v2.py index 6d1e6890..c30706bb 100644 --- a/mindee/client_v2.py +++ b/mindee/client_v2.py @@ -15,7 +15,7 @@ is_valid_post_response, ) from mindee.parsing.v2.inference_response import InferenceResponse -from mindee.parsing.v2.polling_response import PollingResponse +from mindee.parsing.v2.job_response import JobResponse class ClientV2(ClientMixin): @@ -39,7 +39,7 @@ def __init__(self, api_key: Optional[str] = None) -> None: def enqueue( self, input_source: LocalInputSource, options: InferencePredictOptions - ) -> PollingResponse: + ) -> JobResponse: """ Enqueues a document to a given model. @@ -66,12 +66,12 @@ def enqueue( if not is_valid_post_response(response): handle_error_v2(dict_response) - return PollingResponse(dict_response) + return JobResponse(dict_response) def parse_queued( self, queue_id: str, - ) -> Union[InferenceResponse, PollingResponse]: + ) -> Union[InferenceResponse, JobResponse]: """ Parses a queued document. @@ -85,7 +85,7 @@ def parse_queued( dict_response = response.json() if "job" in dict_response: - return PollingResponse(dict_response) + return JobResponse(dict_response) return InferenceResponse(dict_response) def enqueue_and_parse( @@ -118,7 +118,7 @@ def enqueue_and_parse( queue_result.job.id, ) while retry_counter < options.polling_options.max_retries: - if not isinstance(poll_results, PollingResponse): + if not isinstance(poll_results, JobResponse): break if poll_results.job.status == "Failed": raise MindeeError(f"Parsing failed for job {poll_results.job.id}") diff --git a/mindee/parsing/v2/__init__.py b/mindee/parsing/v2/__init__.py index 812b3865..88c26ad8 100644 --- a/mindee/parsing/v2/__init__.py +++ b/mindee/parsing/v2/__init__.py @@ -13,4 +13,4 @@ from mindee.parsing.v2.inference_response import InferenceResponse from mindee.parsing.v2.inference_result import InferenceResult from mindee.parsing.v2.job import Job -from mindee.parsing.v2.polling_response import PollingResponse +from mindee.parsing.v2.job_response import JobResponse diff --git a/mindee/parsing/v2/inference_options.py b/mindee/parsing/v2/inference_options.py index 151acfcc..22df6880 100644 --- a/mindee/parsing/v2/inference_options.py +++ b/mindee/parsing/v2/inference_options.py @@ -1,23 +1,14 @@ from typing import List from mindee.parsing.common.string_dict import StringDict - - -class RawText: - """Raw text extracted from the document.""" - - page: int - content: str - - def __init__(self, raw_response: StringDict): - self.page = raw_response["page"] - self.content = raw_response["content"] +from mindee.parsing.v2.raw_text import RawText class InferenceOptions: """Optional information about the document.""" raw_texts: List[RawText] + """List of text found per page.""" def __init__(self, raw_response: StringDict): self.raw_texts = [RawText(raw_text) for raw_text in raw_response["raw_texts"]] diff --git a/mindee/parsing/v2/job.py b/mindee/parsing/v2/job.py index 53d21c91..b8a045c8 100644 --- a/mindee/parsing/v2/job.py +++ b/mindee/parsing/v2/job.py @@ -12,6 +12,8 @@ class Job: """Job ID.""" error: Optional[ErrorResponse] """Error response if any.""" + created_at: datetime + """Timestamp of the job creation.""" model_id: str """ID of the model.""" filename: str diff --git a/mindee/parsing/v2/polling_response.py b/mindee/parsing/v2/job_response.py similarity index 91% rename from mindee/parsing/v2/polling_response.py rename to mindee/parsing/v2/job_response.py index 929e3db3..ae1e3656 100644 --- a/mindee/parsing/v2/polling_response.py +++ b/mindee/parsing/v2/job_response.py @@ -3,7 +3,7 @@ from mindee.parsing.v2.job import Job -class PollingResponse(CommonResponse): +class JobResponse(CommonResponse): """Represent an inference response from Mindee V2 API.""" job: Job diff --git a/mindee/parsing/v2/raw_text.py b/mindee/parsing/v2/raw_text.py new file mode 100644 index 00000000..50113eb6 --- /dev/null +++ b/mindee/parsing/v2/raw_text.py @@ -0,0 +1,14 @@ +from mindee.parsing.common.string_dict import StringDict + + +class RawText: + """Raw text extracted from the document.""" + + page: int + """Page the raw text was found on.""" + content: str + """Content of the raw text.""" + + def __init__(self, raw_response: StringDict): + self.page = raw_response["page"] + self.content = raw_response["content"] diff --git a/tests/data b/tests/data index 2e278837..e2912fbd 160000 --- a/tests/data +++ b/tests/data @@ -1 +1 @@ -Subproject commit 2e2788376cd0dd6168f1917129588fab6089378d +Subproject commit e2912fbd362b7ccf595a5a8d6cc6a67f78901cde diff --git a/tests/test_client_v2.py b/tests/test_client_v2.py index e7c74f3d..1a382958 100644 --- a/tests/test_client_v2.py +++ b/tests/test_client_v2.py @@ -7,7 +7,7 @@ from mindee.error.mindee_http_error_v2 import MindeeHTTPErrorV2 from mindee.input import LocalInputSource, PathInput from mindee.mindee_http.base_settings import USER_AGENT -from mindee.parsing.v2 import Job, PollingResponse +from mindee.parsing.v2 import Job, JobResponse from tests.test_inputs import FILE_TYPES_DIR, V2_DATA_DIR from tests.utils import dummy_envvars @@ -137,7 +137,7 @@ def test_enqueue(custom_base_url_client): response = custom_base_url_client.parse_queued( "12345678-1234-1234-1234-123456789ABC" ) - assert isinstance(response, PollingResponse) + assert isinstance(response, JobResponse) assert isinstance(response.job, Job) assert response.job.id == "12345678-1234-1234-1234-123456789ABC" assert response.job.model_id == "87654321-4321-4321-4321-CBA987654321" From ce5afe0f8eb06f4b841350abdb319bc899882cd0 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Thu, 10 Jul 2025 18:00:15 +0200 Subject: [PATCH 2/4] update code sample --- docs/extras/code_samples/default_v2.txt | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/docs/extras/code_samples/default_v2.txt b/docs/extras/code_samples/default_v2.txt index 601fedc4..63b2b667 100644 --- a/docs/extras/code_samples/default_v2.txt +++ b/docs/extras/code_samples/default_v2.txt @@ -13,12 +13,21 @@ model_id = "MY_MODEL_ID" # Init a new client mindee_client = ClientV2(api_key) +# Set inference options +options = InferencePredictOptions( + # ID of the model, required. + model_id=model_id, + # If set to `True`, will enable Retrieval-Augmented Generation. + rag=False, +) + # Load a file from disk input_doc = mindee_client.source_from_path(input_path) -options = InferencePredictOptions(model_id=model_id) -# Parse the file. -response: InferenceResponse = mindee_client.enqueue_and_parse(input_doc, options) +# Upload the file +response: InferenceResponse = mindee_client.enqueue_and_parse( + input_doc, options +) # Print a brief summary of the parsed data -print(response.inference) +print(response.inference) \ No newline at end of file From abe24ce4c2668950074edb4d3d065a665ef139f3 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Thu, 10 Jul 2025 18:05:47 +0200 Subject: [PATCH 3/4] remove wrong line --- docs/extras/code_samples/default_v2.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/extras/code_samples/default_v2.txt b/docs/extras/code_samples/default_v2.txt index 63b2b667..b25fcbe4 100644 --- a/docs/extras/code_samples/default_v2.txt +++ b/docs/extras/code_samples/default_v2.txt @@ -2,9 +2,8 @@ # Install the Python client library by running: # pip install mindee # - from mindee import ClientV2, InferencePredictOptions -from mindee.parsing.v2 import InferenceResponse, PollingResponse +from mindee.parsing.v2 import InferenceResponse input_path = "/path/to/the/file.ext" api_key = "MY_API_KEY" From 3c3443616ceead31956eb64627a8842f4dd257e3 Mon Sep 17 00:00:00 2001 From: sebastianMindee <130448732+sebastianMindee@users.noreply.github.com> Date: Thu, 10 Jul 2025 18:07:41 +0200 Subject: [PATCH 4/4] remove more stuff --- docs/extras/code_samples/default_v2.txt | 4 ---- 1 file changed, 4 deletions(-) diff --git a/docs/extras/code_samples/default_v2.txt b/docs/extras/code_samples/default_v2.txt index b25fcbe4..d58e4343 100644 --- a/docs/extras/code_samples/default_v2.txt +++ b/docs/extras/code_samples/default_v2.txt @@ -1,7 +1,3 @@ -# -# Install the Python client library by running: -# pip install mindee -# from mindee import ClientV2, InferencePredictOptions from mindee.parsing.v2 import InferenceResponse