Skip to content

Commit 1aa5ba8

Browse files
✨ add support for workflows
1 parent 10803a5 commit 1aa5ba8

21 files changed

+539
-68
lines changed

.github/workflows/test-regression.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,3 +62,4 @@ jobs:
6262
notification_title: "Regression test workflow {workflow} is failing"
6363
env:
6464
SLACK_WEBHOOK_URL: ${{ secrets.PRODUCTION_ISSUES_SLACK_HOOK_URL }}
65+
WORKFLOW_ID: ${{ secrets.WORKFLOW_ID_SE_TESTS }}
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
from mindee import Client, WorkflowResponse
2+
3+
# Init a new client
4+
mindee_client = Client(api_key: "my-api-key")
5+
6+
workflow_id = "workflow-id"
7+
8+
# Load a file from disk
9+
input_doc = mindee_client.source_from_path("/path/to/the/file.ext")
10+
11+
# Send the file to the workflow.
12+
result: WorkflowResponse = mindee_client.execute_workflow(
13+
input_doc,
14+
workflow_id,
15+
# Optionally, add an alias and a priority to the workflow.
16+
# alias="my-alias",
17+
# priority=ExecutionPriority.LOW
18+
)
19+
20+
# Print the ID of the execution to make sure it worked.
21+
print(result.execution.id)

mindee/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,4 @@
44
from mindee.parsing.common.async_predict_response import AsyncPredictResponse, Job
55
from mindee.parsing.common.feedback_response import FeedbackResponse
66
from mindee.parsing.common.predict_response import PredictResponse
7+
from mindee.parsing.common.workflow_response import WorkflowResponse

mindee/client.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,16 @@
2222
)
2323
from mindee.mindee_http.endpoint import CustomEndpoint, Endpoint
2424
from mindee.mindee_http.mindee_api import MindeeApi
25+
from mindee.mindee_http.workflow_endpoint import WorkflowEndpoint
26+
from mindee.mindee_http.workflow_settings import WorkflowSettings
2527
from mindee.parsing.common.async_predict_response import AsyncPredictResponse
28+
from mindee.parsing.common.execution_priority import ExecutionPriority
2629
from mindee.parsing.common.feedback_response import FeedbackResponse
2730
from mindee.parsing.common.inference import Inference
2831
from mindee.parsing.common.predict_response import PredictResponse
2932
from mindee.parsing.common.string_dict import StringDict
33+
from mindee.parsing.common.workflow_response import WorkflowResponse
34+
from mindee.product import GeneratedV1
3035

3136
OTS_OWNER = "mindee"
3237

@@ -230,6 +235,42 @@ def parse_queued(
230235

231236
return self._get_queued_document(product_class, endpoint, queue_id)
232237

238+
def execute_workflow(
239+
self,
240+
input_source: Union[LocalInputSource, UrlInputSource],
241+
workflow_id: str,
242+
page_options: Optional[PageOptions] = None,
243+
alias: Optional[str] = None,
244+
priority: Optional[ExecutionPriority] = None,
245+
full_text: bool = False,
246+
) -> WorkflowResponse:
247+
"""
248+
Send the document to an asynchronous endpoint and return its ID in the queue.
249+
250+
:param input_source: The document/source file to use.
251+
Has to be created beforehand.
252+
:param workflow_id: ID of the workflow.
253+
:param page_options: If set, remove pages from the document as specified. This is done before sending the file\
254+
to the server. It is useful to avoid page limitations.
255+
:param alias: Optional alias for the document.
256+
:param priority: Optional priority for the document.
257+
:param full_text: Whether to include the full OCR text response in compatible APIs.
258+
:return:
259+
"""
260+
if isinstance(input_source, LocalInputSource):
261+
if page_options and input_source.is_pdf():
262+
input_source.process_pdf(
263+
page_options.operation,
264+
page_options.on_min_pages,
265+
page_options.page_indexes,
266+
)
267+
268+
logger.debug("Sending document to workflow: %s", workflow_id)
269+
270+
return self._send_to_workflow(
271+
GeneratedV1, input_source, workflow_id, alias, priority, full_text
272+
)
273+
233274
def _validate_async_params(
234275
self, initial_delay_sec: float, delay_sec: float, max_retries: int
235276
) -> None:
@@ -438,6 +479,50 @@ def _get_queued_document(
438479

439480
return AsyncPredictResponse(product_class, queue_response.json())
440481

482+
def _send_to_workflow(
483+
self,
484+
product_class: Type[Inference],
485+
input_source: Union[LocalInputSource, UrlInputSource],
486+
workflow_id: str,
487+
alias: Optional[str] = None,
488+
priority: Optional[ExecutionPriority] = None,
489+
full_text: bool = False,
490+
) -> WorkflowResponse:
491+
"""
492+
Sends a document to a workflow.
493+
494+
:param product_class: The document class to use.
495+
The response object will be instantiated based on this parameter.
496+
497+
:param input_source: The document/source file to use.
498+
Has to be created beforehand.
499+
:param workflow_id: ID of the workflow.
500+
:param alias: Optional alias for the document.
501+
:param priority: Priority for the document.
502+
:param full_text: Whether to include the full OCR text response in compatible APIs.
503+
:return:
504+
"""
505+
if input_source is None:
506+
raise MindeeClientError("No input document provided")
507+
508+
workflow_endpoint = WorkflowEndpoint(
509+
WorkflowSettings(api_key=self.api_key, workflow_id=workflow_id)
510+
)
511+
512+
response = workflow_endpoint.workflow_execution_post(
513+
input_source, alias, priority, full_text
514+
)
515+
516+
dict_response = response.json()
517+
518+
if not is_valid_async_response(response):
519+
clean_response = clean_request_json(response)
520+
raise handle_error(
521+
str(product_class.endpoint_name),
522+
clean_response,
523+
)
524+
return WorkflowResponse(product_class, dict_response)
525+
441526
def _initialize_ots_endpoint(self, product_class: Type[Inference]) -> Endpoint:
442527
if product_class.__name__ == "CustomV1":
443528
raise MindeeClientError("Missing endpoint specifications for custom build.")

mindee/mindee_http/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,5 @@
66
is_valid_async_response,
77
is_valid_sync_response,
88
)
9+
from mindee.mindee_http.workflow_endpoint import WorkflowEndpoint
10+
from mindee.mindee_http.workflow_settings import WorkflowSettings

mindee/mindee_http/base_endpoint.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
from abc import ABC
22

3-
from mindee.mindee_http.mindee_api import MindeeApi
3+
from mindee.mindee_http.base_settings import BaseSettings
44

55

66
class BaseEndpoint(ABC):
77
"""Base endpoint class for the Mindee API."""
88

9-
def __init__(self, settings: MindeeApi) -> None:
9+
def __init__(self, settings: BaseSettings) -> None:
1010
"""
1111
Base API endpoint class for all endpoints.
1212
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import os
2+
from dataclasses import dataclass
3+
from typing import Dict, Optional, Union
4+
5+
from mindee.logger import logger
6+
from mindee.versions import __version__, get_platform, python_version
7+
8+
API_KEY_ENV_NAME = "MINDEE_API_KEY"
9+
API_KEY_DEFAULT = ""
10+
11+
BASE_URL_ENV_NAME = "MINDEE_BASE_URL"
12+
BASE_URL_DEFAULT = "https://api.mindee.net/v1"
13+
14+
REQUEST_TIMEOUT_ENV_NAME = "MINDEE_REQUEST_TIMEOUT"
15+
TIMEOUT_DEFAULT = 120
16+
17+
PLATFORM = get_platform()
18+
USER_AGENT = f"mindee-api-python@v{__version__} python-v{python_version} {PLATFORM}"
19+
20+
21+
@dataclass
22+
class BaseSettings:
23+
"""Settings class relating to API requests."""
24+
25+
api_key: Optional[str]
26+
"""API Key for the client."""
27+
base_url: str
28+
request_timeout: int
29+
30+
def __init__(self, api_key: Optional[str]):
31+
self._set_api_key(api_key)
32+
self.request_timeout = TIMEOUT_DEFAULT
33+
self.set_base_url(BASE_URL_DEFAULT)
34+
self.set_from_env()
35+
36+
@property
37+
def base_headers(self) -> Dict[str, str]:
38+
"""Base headers to send with all API requests."""
39+
return {
40+
"Authorization": f"Token {self.api_key}",
41+
"User-Agent": USER_AGENT,
42+
}
43+
44+
def _set_api_key(self, api_key: Optional[str]) -> None:
45+
"""Set the endpoint's API key from an environment variable, if present."""
46+
env_val = os.getenv(API_KEY_ENV_NAME, "")
47+
if env_val and (not api_key or len(api_key) == 0):
48+
logger.debug("API key set from environment")
49+
self.api_key = env_val
50+
return
51+
self.api_key = api_key
52+
53+
def set_from_env(self) -> None:
54+
"""Set various parameters from environment variables, if present."""
55+
env_vars = {
56+
BASE_URL_ENV_NAME: self.set_base_url,
57+
REQUEST_TIMEOUT_ENV_NAME: self.set_timeout,
58+
}
59+
for name, func in env_vars.items():
60+
env_val = os.getenv(name, "")
61+
if env_val:
62+
func(env_val)
63+
logger.debug("Value was set from env: %s", name)
64+
65+
def set_timeout(self, value: Union[str, int]) -> None:
66+
"""Set the timeout for all requests."""
67+
self.request_timeout = int(value)
68+
69+
def set_base_url(self, value: str) -> None:
70+
"""Set the base URL for all requests."""
71+
self.base_url = value

mindee/mindee_http/endpoint.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
class Endpoint(BaseEndpoint):
1414
"""Generic API endpoint for a product."""
1515

16+
settings: MindeeApi
17+
1618
def __init__(
1719
self, url_name: str, owner: str, version: str, settings: MindeeApi
1820
) -> None:

mindee/mindee_http/mindee_api.py

Lines changed: 4 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,22 @@
1-
import os
21
from dataclasses import dataclass
3-
from typing import Dict, Optional, Union
2+
from typing import Optional
43

54
from mindee.error.mindee_error import MindeeApiError
6-
from mindee.logger import logger
7-
from mindee.versions import __version__, get_platform, python_version
8-
9-
API_KEY_ENV_NAME = "MINDEE_API_KEY"
10-
API_KEY_DEFAULT = ""
11-
12-
BASE_URL_ENV_NAME = "MINDEE_BASE_URL"
13-
BASE_URL_DEFAULT = "https://api.mindee.net/v1"
14-
15-
REQUEST_TIMEOUT_ENV_NAME = "MINDEE_REQUEST_TIMEOUT"
16-
TIMEOUT_DEFAULT = 120
17-
18-
PLATFORM = get_platform()
19-
USER_AGENT = f"mindee-api-python@v{__version__} python-v{python_version} {PLATFORM}"
5+
from mindee.mindee_http.base_settings import API_KEY_ENV_NAME, BaseSettings
206

217

228
@dataclass
23-
class MindeeApi:
9+
class MindeeApi(BaseSettings):
2410
"""Settings class relating to API requests."""
2511

26-
api_key: Optional[str]
27-
"""API Key for the client."""
28-
base_url: str
29-
request_timeout: int
30-
3112
def __init__(
3213
self,
3314
api_key: Optional[str],
3415
endpoint_name: str,
3516
account_name: str,
3617
version: str,
3718
):
38-
self._set_api_key(api_key)
19+
super().__init__(api_key)
3920
if not self.api_key or len(self.api_key) == 0:
4021
raise MindeeApiError(
4122
(
@@ -48,44 +29,4 @@ def __init__(
4829
self.endpoint_name = endpoint_name
4930
self.account_name = account_name
5031
self.version = version
51-
self.request_timeout = TIMEOUT_DEFAULT
52-
self.set_base_url(BASE_URL_DEFAULT)
53-
self.set_from_env()
5432
self.url_root = f"{self.base_url}/products/{self.account_name}/{self.endpoint_name}/v{self.version}"
55-
56-
@property
57-
def base_headers(self) -> Dict[str, str]:
58-
"""Base headers to send with all API requests."""
59-
return {
60-
"Authorization": f"Token {self.api_key}",
61-
"User-Agent": USER_AGENT,
62-
}
63-
64-
def _set_api_key(self, api_key: Optional[str]) -> None:
65-
"""Set the endpoint's API key from an environment variable, if present."""
66-
env_val = os.getenv(API_KEY_ENV_NAME, "")
67-
if env_val and (not api_key or len(api_key) == 0):
68-
logger.debug("API key set from environment")
69-
self.api_key = env_val
70-
return
71-
self.api_key = api_key
72-
73-
def set_from_env(self) -> None:
74-
"""Set various parameters from environment variables, if present."""
75-
env_vars = {
76-
BASE_URL_ENV_NAME: self.set_base_url,
77-
REQUEST_TIMEOUT_ENV_NAME: self.set_timeout,
78-
}
79-
for name, func in env_vars.items():
80-
env_val = os.getenv(name, "")
81-
if env_val:
82-
func(env_val)
83-
logger.debug("Value was set from env: %s", name)
84-
85-
def set_timeout(self, value: Union[str, int]) -> None:
86-
"""Set the timeout for all requests."""
87-
self.request_timeout = int(value)
88-
89-
def set_base_url(self, value: str) -> None:
90-
"""Set the base URL for all requests."""
91-
self.base_url = value

mindee/mindee_http/response_validation.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,14 +39,15 @@ def is_valid_async_response(response: requests.Response) -> bool:
3939
):
4040
return False
4141
# Async errors.
42-
if "job" not in response_json:
42+
if "job" not in response_json and "execution" not in response_json:
4343
return False
4444
if (
4545
"job" in response_json
4646
and "error" in response_json["job"]
4747
and response_json["job"]["error"]
4848
):
4949
return False
50+
5051
return True
5152

5253

0 commit comments

Comments
 (0)