From baebce8f3bee245cdc8aba1a107a4e14d320e524 Mon Sep 17 00:00:00 2001 From: sebastianMindee Date: Thu, 21 Nov 2024 18:26:34 +0100 Subject: [PATCH 1/8] :sparkles: add support for workflows --- .github/workflows/test-regression.yml | 1 + .../code_samples/workflow_execution.txt | 21 +++++ mindee/__init__.py | 1 + mindee/client.py | 87 +++++++++++++++++++ mindee/mindee_http/__init__.py | 2 + mindee/mindee_http/base_endpoint.py | 4 +- mindee/mindee_http/base_settings.py | 71 +++++++++++++++ mindee/mindee_http/endpoint.py | 2 + mindee/mindee_http/mindee_api.py | 67 +------------- mindee/mindee_http/response_validation.py | 3 +- mindee/mindee_http/workflow_endpoint.py | 72 +++++++++++++++ mindee/mindee_http/workflow_settings.py | 27 ++++++ mindee/parsing/common/execution.py | 81 +++++++++++++++++ mindee/parsing/common/execution_file.py | 17 ++++ mindee/parsing/common/execution_priority.py | 9 ++ mindee/parsing/common/workflow_response.py | 20 +++++ tests/test_code_samples.sh | 2 +- tests/utils.py | 2 +- tests/workflows/__init__.py | 0 tests/workflows/test_workflow.py | 81 +++++++++++++++++ tests/workflows/test_workflow_integration.py | 39 +++++++++ 21 files changed, 541 insertions(+), 68 deletions(-) create mode 100644 docs/extras/code_samples/workflow_execution.txt create mode 100644 mindee/mindee_http/base_settings.py create mode 100644 mindee/mindee_http/workflow_endpoint.py create mode 100644 mindee/mindee_http/workflow_settings.py create mode 100644 mindee/parsing/common/execution.py create mode 100644 mindee/parsing/common/execution_file.py create mode 100644 mindee/parsing/common/execution_priority.py create mode 100644 mindee/parsing/common/workflow_response.py create mode 100644 tests/workflows/__init__.py create mode 100644 tests/workflows/test_workflow.py create mode 100644 tests/workflows/test_workflow_integration.py diff --git a/.github/workflows/test-regression.yml b/.github/workflows/test-regression.yml index d742cc55..d2df1a5b 100644 --- a/.github/workflows/test-regression.yml +++ b/.github/workflows/test-regression.yml @@ -62,3 +62,4 @@ jobs: notification_title: "Regression test workflow {workflow} is failing" env: SLACK_WEBHOOK_URL: ${{ secrets.PRODUCTION_ISSUES_SLACK_HOOK_URL }} + WORKFLOW_ID: ${{ secrets.WORKFLOW_ID_SE_TESTS }} diff --git a/docs/extras/code_samples/workflow_execution.txt b/docs/extras/code_samples/workflow_execution.txt new file mode 100644 index 00000000..29bcdddb --- /dev/null +++ b/docs/extras/code_samples/workflow_execution.txt @@ -0,0 +1,21 @@ +from mindee import Client, WorkflowResponse + +# Init a new client +mindee_client = Client(api_key: "my-api-key") + +workflow_id = "workflow-id" + +# Load a file from disk +input_doc = mindee_client.source_from_path("/path/to/the/file.ext") + +# Send the file to the workflow. +result: WorkflowResponse = mindee_client.execute_workflow( + input_doc, + workflow_id, + # Optionally, add an alias and a priority to the workflow. + # alias="my-alias", + # priority=ExecutionPriority.LOW +) + +# Print the ID of the execution to make sure it worked. +print(result.execution.id) diff --git a/mindee/__init__.py b/mindee/__init__.py index bf129c3f..27647254 100644 --- a/mindee/__init__.py +++ b/mindee/__init__.py @@ -4,3 +4,4 @@ from mindee.parsing.common.async_predict_response import AsyncPredictResponse, Job from mindee.parsing.common.feedback_response import FeedbackResponse from mindee.parsing.common.predict_response import PredictResponse +from mindee.parsing.common.workflow_response import WorkflowResponse diff --git a/mindee/client.py b/mindee/client.py index 3edd23e0..b79f2195 100644 --- a/mindee/client.py +++ b/mindee/client.py @@ -22,11 +22,18 @@ is_valid_async_response, is_valid_sync_response, ) +from mindee.mindee_http.endpoint import CustomEndpoint, Endpoint +from mindee.mindee_http.mindee_api import MindeeApi +from mindee.mindee_http.workflow_endpoint import WorkflowEndpoint +from mindee.mindee_http.workflow_settings import WorkflowSettings from mindee.parsing.common.async_predict_response import AsyncPredictResponse +from mindee.parsing.common.execution_priority import ExecutionPriority from mindee.parsing.common.feedback_response import FeedbackResponse from mindee.parsing.common.inference import Inference from mindee.parsing.common.predict_response import PredictResponse from mindee.parsing.common.string_dict import StringDict +from mindee.parsing.common.workflow_response import WorkflowResponse +from mindee.product import GeneratedV1 OTS_OWNER = "mindee" @@ -230,6 +237,42 @@ def parse_queued( return self._get_queued_document(product_class, endpoint, queue_id) + def execute_workflow( + self, + input_source: Union[LocalInputSource, UrlInputSource], + workflow_id: str, + page_options: Optional[PageOptions] = None, + alias: Optional[str] = None, + priority: Optional[ExecutionPriority] = None, + full_text: bool = False, + ) -> WorkflowResponse: + """ + Send the document to an asynchronous endpoint and return its ID in the queue. + + :param input_source: The document/source file to use. + Has to be created beforehand. + :param workflow_id: ID of the workflow. + :param page_options: If set, remove pages from the document as specified. This is done before sending the file\ + to the server. It is useful to avoid page limitations. + :param alias: Optional alias for the document. + :param priority: Optional priority for the document. + :param full_text: Whether to include the full OCR text response in compatible APIs. + :return: + """ + if isinstance(input_source, LocalInputSource): + if page_options and input_source.is_pdf(): + input_source.process_pdf( + page_options.operation, + page_options.on_min_pages, + page_options.page_indexes, + ) + + logger.debug("Sending document to workflow: %s", workflow_id) + + return self._send_to_workflow( + GeneratedV1, input_source, workflow_id, alias, priority, full_text + ) + def _validate_async_params( self, initial_delay_sec: float, delay_sec: float, max_retries: int ) -> None: @@ -438,6 +481,50 @@ def _get_queued_document( return AsyncPredictResponse(product_class, queue_response.json()) + def _send_to_workflow( + self, + product_class: Type[Inference], + input_source: Union[LocalInputSource, UrlInputSource], + workflow_id: str, + alias: Optional[str] = None, + priority: Optional[ExecutionPriority] = None, + full_text: bool = False, + ) -> WorkflowResponse: + """ + Sends a document to a workflow. + + :param product_class: The document class to use. + The response object will be instantiated based on this parameter. + + :param input_source: The document/source file to use. + Has to be created beforehand. + :param workflow_id: ID of the workflow. + :param alias: Optional alias for the document. + :param priority: Priority for the document. + :param full_text: Whether to include the full OCR text response in compatible APIs. + :return: + """ + if input_source is None: + raise MindeeClientError("No input document provided") + + workflow_endpoint = WorkflowEndpoint( + WorkflowSettings(api_key=self.api_key, workflow_id=workflow_id) + ) + + response = workflow_endpoint.workflow_execution_post( + input_source, alias, priority, full_text + ) + + dict_response = response.json() + + if not is_valid_async_response(response): + clean_response = clean_request_json(response) + raise handle_error( + str(product_class.endpoint_name), + clean_response, + ) + return WorkflowResponse(product_class, dict_response) + def _initialize_ots_endpoint(self, product_class: Type[Inference]) -> Endpoint: if product_class.__name__ == "CustomV1": raise MindeeClientError("Missing endpoint specifications for custom build.") diff --git a/mindee/mindee_http/__init__.py b/mindee/mindee_http/__init__.py index 4cc858db..964258ef 100644 --- a/mindee/mindee_http/__init__.py +++ b/mindee/mindee_http/__init__.py @@ -6,3 +6,5 @@ is_valid_async_response, is_valid_sync_response, ) +from mindee.mindee_http.workflow_endpoint import WorkflowEndpoint +from mindee.mindee_http.workflow_settings import WorkflowSettings diff --git a/mindee/mindee_http/base_endpoint.py b/mindee/mindee_http/base_endpoint.py index fbae13b8..51c75f71 100644 --- a/mindee/mindee_http/base_endpoint.py +++ b/mindee/mindee_http/base_endpoint.py @@ -1,12 +1,12 @@ from abc import ABC -from mindee.mindee_http.mindee_api import MindeeApi +from mindee.mindee_http.base_settings import BaseSettings class BaseEndpoint(ABC): """Base endpoint class for the Mindee API.""" - def __init__(self, settings: MindeeApi) -> None: + def __init__(self, settings: BaseSettings) -> None: """ Base API endpoint class for all endpoints. diff --git a/mindee/mindee_http/base_settings.py b/mindee/mindee_http/base_settings.py new file mode 100644 index 00000000..c83e0869 --- /dev/null +++ b/mindee/mindee_http/base_settings.py @@ -0,0 +1,71 @@ +import os +from dataclasses import dataclass +from typing import Dict, Optional, Union + +from mindee.logger import logger +from mindee.versions import __version__, get_platform, python_version + +API_KEY_ENV_NAME = "MINDEE_API_KEY" +API_KEY_DEFAULT = "" + +BASE_URL_ENV_NAME = "MINDEE_BASE_URL" +BASE_URL_DEFAULT = "https://api.mindee.net/v1" + +REQUEST_TIMEOUT_ENV_NAME = "MINDEE_REQUEST_TIMEOUT" +TIMEOUT_DEFAULT = 120 + +PLATFORM = get_platform() +USER_AGENT = f"mindee-api-python@v{__version__} python-v{python_version} {PLATFORM}" + + +@dataclass +class BaseSettings: + """Settings class relating to API requests.""" + + api_key: Optional[str] + """API Key for the client.""" + base_url: str + request_timeout: int + + def __init__(self, api_key: Optional[str]): + self._set_api_key(api_key) + self.request_timeout = TIMEOUT_DEFAULT + self.set_base_url(BASE_URL_DEFAULT) + self.set_from_env() + + @property + def base_headers(self) -> Dict[str, str]: + """Base headers to send with all API requests.""" + return { + "Authorization": f"Token {self.api_key}", + "User-Agent": USER_AGENT, + } + + def _set_api_key(self, api_key: Optional[str]) -> None: + """Set the endpoint's API key from an environment variable, if present.""" + env_val = os.getenv(API_KEY_ENV_NAME, "") + if env_val and (not api_key or len(api_key) == 0): + logger.debug("API key set from environment") + self.api_key = env_val + return + self.api_key = api_key + + def set_from_env(self) -> None: + """Set various parameters from environment variables, if present.""" + env_vars = { + BASE_URL_ENV_NAME: self.set_base_url, + REQUEST_TIMEOUT_ENV_NAME: self.set_timeout, + } + for name, func in env_vars.items(): + env_val = os.getenv(name, "") + if env_val: + func(env_val) + logger.debug("Value was set from env: %s", name) + + def set_timeout(self, value: Union[str, int]) -> None: + """Set the timeout for all requests.""" + self.request_timeout = int(value) + + def set_base_url(self, value: str) -> None: + """Set the base URL for all requests.""" + self.base_url = value diff --git a/mindee/mindee_http/endpoint.py b/mindee/mindee_http/endpoint.py index 5d510d9c..fdbd2ae7 100644 --- a/mindee/mindee_http/endpoint.py +++ b/mindee/mindee_http/endpoint.py @@ -13,6 +13,8 @@ class Endpoint(BaseEndpoint): """Generic API endpoint for a product.""" + settings: MindeeApi + def __init__( self, url_name: str, owner: str, version: str, settings: MindeeApi ) -> None: diff --git a/mindee/mindee_http/mindee_api.py b/mindee/mindee_http/mindee_api.py index b93b772e..33bfc7d9 100644 --- a/mindee/mindee_http/mindee_api.py +++ b/mindee/mindee_http/mindee_api.py @@ -1,33 +1,14 @@ -import os from dataclasses import dataclass -from typing import Dict, Optional, Union +from typing import Optional from mindee.error.mindee_error import MindeeApiError -from mindee.logger import logger -from mindee.versions import __version__, get_platform, python_version - -API_KEY_ENV_NAME = "MINDEE_API_KEY" -API_KEY_DEFAULT = "" - -BASE_URL_ENV_NAME = "MINDEE_BASE_URL" -BASE_URL_DEFAULT = "https://api.mindee.net/v1" - -REQUEST_TIMEOUT_ENV_NAME = "MINDEE_REQUEST_TIMEOUT" -TIMEOUT_DEFAULT = 120 - -PLATFORM = get_platform() -USER_AGENT = f"mindee-api-python@v{__version__} python-v{python_version} {PLATFORM}" +from mindee.mindee_http.base_settings import API_KEY_ENV_NAME, BaseSettings @dataclass -class MindeeApi: +class MindeeApi(BaseSettings): """Settings class relating to API requests.""" - api_key: Optional[str] - """API Key for the client.""" - base_url: str - request_timeout: int - def __init__( self, api_key: Optional[str], @@ -35,7 +16,7 @@ def __init__( account_name: str, version: str, ): - self._set_api_key(api_key) + super().__init__(api_key) if not self.api_key or len(self.api_key) == 0: raise MindeeApiError( ( @@ -48,44 +29,4 @@ def __init__( self.endpoint_name = endpoint_name self.account_name = account_name self.version = version - self.request_timeout = TIMEOUT_DEFAULT - self.set_base_url(BASE_URL_DEFAULT) - self.set_from_env() self.url_root = f"{self.base_url}/products/{self.account_name}/{self.endpoint_name}/v{self.version}" - - @property - def base_headers(self) -> Dict[str, str]: - """Base headers to send with all API requests.""" - return { - "Authorization": f"Token {self.api_key}", - "User-Agent": USER_AGENT, - } - - def _set_api_key(self, api_key: Optional[str]) -> None: - """Set the endpoint's API key from an environment variable, if present.""" - env_val = os.getenv(API_KEY_ENV_NAME, "") - if env_val and (not api_key or len(api_key) == 0): - logger.debug("API key set from environment") - self.api_key = env_val - return - self.api_key = api_key - - def set_from_env(self) -> None: - """Set various parameters from environment variables, if present.""" - env_vars = { - BASE_URL_ENV_NAME: self.set_base_url, - REQUEST_TIMEOUT_ENV_NAME: self.set_timeout, - } - for name, func in env_vars.items(): - env_val = os.getenv(name, "") - if env_val: - func(env_val) - logger.debug("Value was set from env: %s", name) - - def set_timeout(self, value: Union[str, int]) -> None: - """Set the timeout for all requests.""" - self.request_timeout = int(value) - - def set_base_url(self, value: str) -> None: - """Set the base URL for all requests.""" - self.base_url = value diff --git a/mindee/mindee_http/response_validation.py b/mindee/mindee_http/response_validation.py index 5976a803..e261df91 100644 --- a/mindee/mindee_http/response_validation.py +++ b/mindee/mindee_http/response_validation.py @@ -39,7 +39,7 @@ def is_valid_async_response(response: requests.Response) -> bool: ): return False # Async errors. - if "job" not in response_json: + if "job" not in response_json and "execution" not in response_json: return False if ( "job" in response_json @@ -47,6 +47,7 @@ def is_valid_async_response(response: requests.Response) -> bool: and response_json["job"]["error"] ): return False + return True diff --git a/mindee/mindee_http/workflow_endpoint.py b/mindee/mindee_http/workflow_endpoint.py new file mode 100644 index 00000000..a902389b --- /dev/null +++ b/mindee/mindee_http/workflow_endpoint.py @@ -0,0 +1,72 @@ +from typing import Optional, Union + +import requests + +from mindee.input import LocalInputSource, UrlInputSource +from mindee.mindee_http import BaseEndpoint +from mindee.mindee_http.workflow_settings import WorkflowSettings +from mindee.parsing.common.execution_priority import ExecutionPriority + + +class WorkflowEndpoint(BaseEndpoint): + """Workflow endpoint.""" + + settings: WorkflowSettings + + def __init__(self, settings: WorkflowSettings) -> None: + """ + Workflow Endpoint. + + :param settings: Settings object. + """ + super().__init__(settings) + + def workflow_execution_post( + self, + input_source: Union[LocalInputSource, UrlInputSource], + alias: Optional[str] = None, + priority: Optional[ExecutionPriority] = None, + full_text: bool = False, + ): + """ + Sends the document to the workflow. + + :param input_source: The document/source file to use. + Has to be created beforehand. + :param alias: Optional alias for the document. + :param priority: Priority for the document. + :param full_text: Whether to include the full OCR text response in compatible APIs. + :return: + """ + data = {} + + if alias: + data["alias"] = alias + if priority: + data["priority"] = priority.value + + params = {} + if full_text: + params["full_text_ocr"] = "true" + + if isinstance(input_source, UrlInputSource): + data["document"] = input_source.url + response = requests.post( + self.settings.url_root, + headers=self.settings.base_headers, + data=data, + params=params, + timeout=self.settings.request_timeout, + ) + else: + files = {"document": input_source.read_contents(True)} + response = requests.post( + self.settings.url_root, + files=files, + headers=self.settings.base_headers, + data=data, + params=params, + timeout=self.settings.request_timeout, + ) + + return response diff --git a/mindee/mindee_http/workflow_settings.py b/mindee/mindee_http/workflow_settings.py new file mode 100644 index 00000000..74668920 --- /dev/null +++ b/mindee/mindee_http/workflow_settings.py @@ -0,0 +1,27 @@ +from dataclasses import dataclass +from typing import Optional + +from mindee.error.mindee_error import MindeeApiError +from mindee.mindee_http.base_settings import API_KEY_ENV_NAME, BaseSettings + + +@dataclass +class WorkflowSettings(BaseSettings): + """Settings class relating to workflow requests.""" + + def __init__( + self, + api_key: Optional[str], + workflow_id: str, + ): + super().__init__(api_key) + if not self.api_key or len(self.api_key) == 0: + raise MindeeApiError( + ( + f"Missing API key for workflow '{workflow_id}'," + " check your Client configuration.\n" + "You can set this using the " + f"'{API_KEY_ENV_NAME}' environment variable." + ) + ) + self.url_root = f"{self.base_url}/workflows/{workflow_id}/executions" diff --git a/mindee/parsing/common/execution.py b/mindee/parsing/common/execution.py new file mode 100644 index 00000000..f55352b7 --- /dev/null +++ b/mindee/parsing/common/execution.py @@ -0,0 +1,81 @@ +from datetime import datetime +from typing import Generic, Optional, Type + +from mindee.parsing.common import Inference, Page, StringDict +from mindee.parsing.common.execution_file import ExecutionFile +from mindee.parsing.common.execution_priority import ExecutionPriority +from mindee.parsing.common.prediction import TypePrediction +from mindee.product import GeneratedV1Document + + +class Execution(Generic[TypePrediction]): + """Workflow execution class.""" + + batch_name: str + """Identifier for the batch to which the execution belongs.""" + + created_at: Optional[datetime] + """The time at which the execution started.""" + + file: ExecutionFile + """File representation within a workflow execution.""" + + id: str + """Identifier for the execution.""" + + inference: Optional[Inference[TypePrediction, Page[TypePrediction]]] + """Deserialized inference object.""" + + priority: Optional["ExecutionPriority"] + """Priority of the execution.""" + + reviewed_at: Optional[datetime] + """The time at which the file was tagged as reviewed.""" + + available_at: Optional[datetime] + """The time at which the file was uploaded to a workflow.""" + + reviewed_prediction: Optional["GeneratedV1Document"] + """Reviewed fields and values.""" + + status: str + """Execution Status.""" + + type: Optional[str] + """Execution type.""" + + uploaded_at: Optional[datetime] + """The time at which the file was uploaded to a workflow.""" + + workflow_id: str + """Identifier for the workflow.""" + + def __init__(self, inference_type: Type[Inference], json_response: StringDict): + self.batch_name = json_response["batch_name"] + self.created_at = self.parse_date(json_response.get("created_at", None)) + self.file = ExecutionFile(json_response["file"]) + self.id = json_response["id"] + self.inference = ( + inference_type(json_response["inference"]) + if json_response["inference"] + else None + ) + self.priority = json_response.get("priority", None) + self.reviewed_at = self.parse_date(json_response.get("reviewed_at", None)) + self.available_at = self.parse_date(json_response.get("available_at", None)) + self.reviewed_prediction = ( + GeneratedV1Document(json_response["reviewed_prediction"]) + if json_response["reviewed_prediction"] + else None + ) + self.status = json_response["status"] + self.type = json_response.get("type", None) + self.uploaded_at = self.parse_date(json_response.get("uploaded_at", None)) + self.workflow_id = json_response["workflow_id"] + + @staticmethod + def parse_date(date_string: Optional[str]) -> Optional[datetime]: + """Shorthand to parse the date, if present.""" + if not date_string: + return None + return datetime.fromisoformat(date_string) diff --git a/mindee/parsing/common/execution_file.py b/mindee/parsing/common/execution_file.py new file mode 100644 index 00000000..de9dcc75 --- /dev/null +++ b/mindee/parsing/common/execution_file.py @@ -0,0 +1,17 @@ +from typing import Optional + +from mindee.parsing.common import StringDict + + +class ExecutionFile: + """Execution File class.""" + + name: Optional[str] + """File name.""" + + alias: Optional[str] + """File name.""" + + def __init__(self, json_response: StringDict): + self.name = json_response["name"] + self.alias = json_response["alias"] diff --git a/mindee/parsing/common/execution_priority.py b/mindee/parsing/common/execution_priority.py new file mode 100644 index 00000000..9bb2ccca --- /dev/null +++ b/mindee/parsing/common/execution_priority.py @@ -0,0 +1,9 @@ +from enum import Enum + + +class ExecutionPriority(Enum): + """Available priorities for workflow executions.""" + + LOW = "low" + MEDIUM = "medium" + HIGH = "high" diff --git a/mindee/parsing/common/workflow_response.py b/mindee/parsing/common/workflow_response.py new file mode 100644 index 00000000..d5316ef7 --- /dev/null +++ b/mindee/parsing/common/workflow_response.py @@ -0,0 +1,20 @@ +from typing import Generic, Type + +from mindee.parsing.common import Inference, StringDict +from mindee.parsing.common.api_response import ApiResponse +from mindee.parsing.common.execution import Execution +from mindee.parsing.common.prediction import TypePrediction + + +class WorkflowResponse(Generic[TypePrediction], ApiResponse): + """Base wrapper for API requests.""" + + execution: Execution + """ + Set the prediction model used to parse the document. + The response object will be instantiated based on this parameter. + """ + + def __init__(self, inference_type: Type[Inference], raw_response: StringDict): + super().__init__(raw_response) + self.execution = Execution(inference_type, raw_response["execution"]) diff --git a/tests/test_code_samples.sh b/tests/test_code_samples.sh index c902a084..32878785 100755 --- a/tests/test_code_samples.sh +++ b/tests/test_code_samples.sh @@ -6,7 +6,7 @@ ACCOUNT=$1 ENDPOINT=$2 API_KEY=$3 -for f in $(find ./docs/extras/code_samples -maxdepth 1 -name "*.txt" | sort -h) +for f in $(find ./docs/extras/code_samples -maxdepth 1 -name "*.txt" -not -name "workflow_execution.txt" | sort -h) do echo echo "###############################################" diff --git a/tests/utils.py b/tests/utils.py index 3245adfa..6e1af706 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,7 +1,7 @@ from difflib import SequenceMatcher from pathlib import Path -from mindee.mindee_http.mindee_api import ( +from mindee.mindee_http.base_settings import ( API_KEY_ENV_NAME, BASE_URL_ENV_NAME, REQUEST_TIMEOUT_ENV_NAME, diff --git a/tests/workflows/__init__.py b/tests/workflows/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/workflows/test_workflow.py b/tests/workflows/test_workflow.py new file mode 100644 index 00000000..526eeee9 --- /dev/null +++ b/tests/workflows/test_workflow.py @@ -0,0 +1,81 @@ +import json +from pathlib import Path + +import pytest + +from mindee.parsing.common.workflow_response import WorkflowResponse +from mindee.product.generated.generated_v1 import GeneratedV1 + +WORKFLOW_DIR = Path("./tests/data") / "workflows" + + +@pytest.fixture +def success_workflow() -> WorkflowResponse: + file_path = WORKFLOW_DIR / "success.json" + with open(file_path, "r", encoding="utf-8") as open_file: + json_data = json.load(open_file) + return WorkflowResponse(GeneratedV1, json_data) + + +@pytest.fixture +def success_low_priority_workflow() -> WorkflowResponse: + file_path = WORKFLOW_DIR / "success_low_priority.json" + with open(file_path, "r", encoding="utf-8") as open_file: + json_data = json.load(open_file) + return WorkflowResponse(GeneratedV1, json_data) + + +def test_deserialize_workflow(success_workflow: WorkflowResponse): + assert success_workflow is not None + assert success_workflow.api_request is not None + assert success_workflow.execution.batch_name is None + assert success_workflow.execution.created_at is None + assert success_workflow.execution.file.alias is None + assert success_workflow.execution.file.name == "default_sample.jpg" + assert success_workflow.execution.id == "8c75c035-e083-4e77-ba3b-7c3598bd1d8a" + assert success_workflow.execution.inference is None + assert success_workflow.execution.priority == "medium" + assert success_workflow.execution.reviewed_at is None + assert success_workflow.execution.reviewed_prediction is None + assert success_workflow.execution.status == "processing" + assert success_workflow.execution.type == "manual" + assert ( + success_workflow.execution.uploaded_at.strftime("%Y-%m-%dT%H:%M:%S.%f") + == "2024-11-13T13:02:31.699190" + ) + assert ( + success_workflow.execution.workflow_id == "07ebf237-ff27-4eee-b6a2-425df4a5cca6" + ) + + +def test_deserialize_workflow_with_priority_and_alias( + success_low_priority_workflow: WorkflowResponse, +): + assert success_low_priority_workflow is not None + assert success_low_priority_workflow.api_request is not None + assert success_low_priority_workflow.execution.batch_name is None + assert success_low_priority_workflow.execution.created_at is None + assert ( + success_low_priority_workflow.execution.file.alias == "low-priority-sample-test" + ) + assert success_low_priority_workflow.execution.file.name == "default_sample.jpg" + assert ( + success_low_priority_workflow.execution.id + == "b743e123-e18c-4b62-8a07-811a4f72afd3" + ) + assert success_low_priority_workflow.execution.inference is None + assert success_low_priority_workflow.execution.priority == "low" + assert success_low_priority_workflow.execution.reviewed_at is None + assert success_low_priority_workflow.execution.reviewed_prediction is None + assert success_low_priority_workflow.execution.status == "processing" + assert success_low_priority_workflow.execution.type == "manual" + assert ( + success_low_priority_workflow.execution.uploaded_at.strftime( + "%Y-%m-%dT%H:%M:%S.%f" + ) + == "2024-11-13T13:17:01.315179" + ) + assert ( + success_low_priority_workflow.execution.workflow_id + == "07ebf237-ff27-4eee-b6a2-425df4a5cca6" + ) diff --git a/tests/workflows/test_workflow_integration.py b/tests/workflows/test_workflow_integration.py new file mode 100644 index 00000000..d378fa34 --- /dev/null +++ b/tests/workflows/test_workflow_integration.py @@ -0,0 +1,39 @@ +import os +from datetime import datetime + +import pytest + +from mindee import Client +from mindee.parsing.common.execution_priority import ExecutionPriority +from tests.product import PRODUCT_DATA_DIR + + +@pytest.fixture +def mindee_client(): + return Client() + + +@pytest.fixture +def workflow_id(): + return os.getenv("WORKFLOW_ID", "") + + +@pytest.fixture +def input_path(): + return PRODUCT_DATA_DIR / "financial_document" / "default_sample.jpg" + + +@pytest.mark.integration +def test_workflow(mindee_client: Client, workflow_id: str, input_path: str): + input_source = mindee_client.source_from_path(str(input_path)) + current_date_time = datetime.now().strftime("%Y-%m-%d-%H:%M:%S") + alias = f"python-{current_date_time}" + priority = ExecutionPriority.LOW + + response = mindee_client.execute_workflow( + input_source, workflow_id, alias=alias, priority=priority + ) + + assert response.api_request.status_code == 202 + assert response.execution.file.alias == f"python-{current_date_time}" + assert response.execution.priority == "low" From 27720202de861e2b98ee4bd70975667041dfacda Mon Sep 17 00:00:00 2001 From: sebastianMindee Date: Thu, 21 Nov 2024 18:28:37 +0100 Subject: [PATCH 2/8] add hook notification --- .github/workflows/test-code-samples.yml | 2 +- .github/workflows/test-integration.yml | 11 +++++++++++ .github/workflows/test-regression.yml | 2 +- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-code-samples.yml b/.github/workflows/test-code-samples.yml index d4c1a8ad..bf4d4a12 100644 --- a/.github/workflows/test-code-samples.yml +++ b/.github/workflows/test-code-samples.yml @@ -52,6 +52,6 @@ jobs: with: status: ${{ job.status }} notify_when: "failure" - notification_title: "{workflow} is failing" + notification_title: "Code sample test '{workflow}' is failing" env: SLACK_WEBHOOK_URL: ${{ secrets.PRODUCTION_ISSUES_SLACK_HOOK_URL }} diff --git a/.github/workflows/test-integration.yml b/.github/workflows/test-integration.yml index 4b0bfd39..642493a2 100644 --- a/.github/workflows/test-integration.yml +++ b/.github/workflows/test-integration.yml @@ -51,3 +51,14 @@ jobs: MINDEE_API_KEY: ${{ secrets.MINDEE_API_KEY_SE_TESTS }} run: | pytest -m integration + + + - name: Notify Slack Action on Failure + uses: ravsamhq/notify-slack-action@2.3.0 + if: ${{ always() && github.ref_name == 'main' }} + with: + status: ${{ job.status }} + notify_when: "failure" + notification_title: "Integration test '{workflow}' is failing" + env: + SLACK_WEBHOOK_URL: ${{ secrets.PRODUCTION_ISSUES_SLACK_HOOK_URL }} \ No newline at end of file diff --git a/.github/workflows/test-regression.yml b/.github/workflows/test-regression.yml index d2df1a5b..1f2de87d 100644 --- a/.github/workflows/test-regression.yml +++ b/.github/workflows/test-regression.yml @@ -59,7 +59,7 @@ jobs: with: status: ${{ job.status }} notify_when: "failure" - notification_title: "Regression test workflow {workflow} is failing" + notification_title: "Regression test workflow '{workflow}' is failing" env: SLACK_WEBHOOK_URL: ${{ secrets.PRODUCTION_ISSUES_SLACK_HOOK_URL }} WORKFLOW_ID: ${{ secrets.WORKFLOW_ID_SE_TESTS }} From 75a213c58d10d251ad2f2b1366cae52477a13e6f Mon Sep 17 00:00:00 2001 From: sebastianMindee Date: Thu, 21 Nov 2024 18:31:42 +0100 Subject: [PATCH 3/8] fix cyclic import --- mindee/mindee_http/workflow_endpoint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mindee/mindee_http/workflow_endpoint.py b/mindee/mindee_http/workflow_endpoint.py index a902389b..02503bee 100644 --- a/mindee/mindee_http/workflow_endpoint.py +++ b/mindee/mindee_http/workflow_endpoint.py @@ -3,7 +3,7 @@ import requests from mindee.input import LocalInputSource, UrlInputSource -from mindee.mindee_http import BaseEndpoint +from mindee.mindee_http.base_endpoint import BaseEndpoint from mindee.mindee_http.workflow_settings import WorkflowSettings from mindee.parsing.common.execution_priority import ExecutionPriority From 8f41a0a3695b82312f3baea156ee7ebfffc48079 Mon Sep 17 00:00:00 2001 From: sebastianMindee Date: Thu, 21 Nov 2024 18:36:21 +0100 Subject: [PATCH 4/8] fix env var location --- .github/workflows/test-integration.yml | 1 + .github/workflows/test-regression.yml | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-integration.yml b/.github/workflows/test-integration.yml index 642493a2..1e172547 100644 --- a/.github/workflows/test-integration.yml +++ b/.github/workflows/test-integration.yml @@ -49,6 +49,7 @@ jobs: - name: Run Integration Testing env: MINDEE_API_KEY: ${{ secrets.MINDEE_API_KEY_SE_TESTS }} + WORKFLOW_ID: ${{ secrets.WORKFLOW_ID_SE_TESTS }} run: | pytest -m integration diff --git a/.github/workflows/test-regression.yml b/.github/workflows/test-regression.yml index 1f2de87d..5ef5691f 100644 --- a/.github/workflows/test-regression.yml +++ b/.github/workflows/test-regression.yml @@ -62,4 +62,3 @@ jobs: notification_title: "Regression test workflow '{workflow}' is failing" env: SLACK_WEBHOOK_URL: ${{ secrets.PRODUCTION_ISSUES_SLACK_HOOK_URL }} - WORKFLOW_ID: ${{ secrets.WORKFLOW_ID_SE_TESTS }} From dd9ae3eb1141e3b9b385edced6fa4ed90e0d144f Mon Sep 17 00:00:00 2001 From: sebastianMindee Date: Fri, 22 Nov 2024 10:34:50 +0100 Subject: [PATCH 5/8] add exports to init --- mindee/parsing/common/__init__.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/mindee/parsing/common/__init__.py b/mindee/parsing/common/__init__.py index 7d5bf8ed..4707e85d 100644 --- a/mindee/parsing/common/__init__.py +++ b/mindee/parsing/common/__init__.py @@ -2,6 +2,9 @@ from mindee.parsing.common.api_response import ApiResponse from mindee.parsing.common.async_predict_response import AsyncPredictResponse from mindee.parsing.common.document import Document +from mindee.parsing.common.execution import Execution +from mindee.parsing.common.execution_file import ExecutionFile +from mindee.parsing.common.execution_priority import ExecutionPriority from mindee.parsing.common.extras import CropperExtra, Extras from mindee.parsing.common.feedback_response import FeedbackResponse from mindee.parsing.common.inference import Inference, TypeInference @@ -18,3 +21,4 @@ format_for_display, line_separator, ) +from mindee.parsing.common.workflow_response import WorkflowResponse From cc1e30bcfb3432b810b70f5f0be23afae4e49b25 Mon Sep 17 00:00:00 2001 From: sebastianMindee Date: Fri, 22 Nov 2024 17:32:43 +0100 Subject: [PATCH 6/8] fix syntax --- mindee/client.py | 2 -- mindee/parsing/common/execution.py | 6 ++++-- mindee/parsing/common/execution_file.py | 2 +- mindee/parsing/common/workflow_response.py | 3 ++- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/mindee/client.py b/mindee/client.py index b79f2195..199d4c7e 100644 --- a/mindee/client.py +++ b/mindee/client.py @@ -22,8 +22,6 @@ is_valid_async_response, is_valid_sync_response, ) -from mindee.mindee_http.endpoint import CustomEndpoint, Endpoint -from mindee.mindee_http.mindee_api import MindeeApi from mindee.mindee_http.workflow_endpoint import WorkflowEndpoint from mindee.mindee_http.workflow_settings import WorkflowSettings from mindee.parsing.common.async_predict_response import AsyncPredictResponse diff --git a/mindee/parsing/common/execution.py b/mindee/parsing/common/execution.py index f55352b7..6e3d7da5 100644 --- a/mindee/parsing/common/execution.py +++ b/mindee/parsing/common/execution.py @@ -1,11 +1,13 @@ from datetime import datetime from typing import Generic, Optional, Type -from mindee.parsing.common import Inference, Page, StringDict from mindee.parsing.common.execution_file import ExecutionFile from mindee.parsing.common.execution_priority import ExecutionPriority +from mindee.parsing.common.inference import Inference +from mindee.parsing.common.page import Page from mindee.parsing.common.prediction import TypePrediction -from mindee.product import GeneratedV1Document +from mindee.parsing.common.string_dict import StringDict +from mindee.product.generated.generated_v1 import GeneratedV1Document class Execution(Generic[TypePrediction]): diff --git a/mindee/parsing/common/execution_file.py b/mindee/parsing/common/execution_file.py index de9dcc75..3c728ddb 100644 --- a/mindee/parsing/common/execution_file.py +++ b/mindee/parsing/common/execution_file.py @@ -1,6 +1,6 @@ from typing import Optional -from mindee.parsing.common import StringDict +from mindee.parsing.common.string_dict import StringDict class ExecutionFile: diff --git a/mindee/parsing/common/workflow_response.py b/mindee/parsing/common/workflow_response.py index d5316ef7..c0f01105 100644 --- a/mindee/parsing/common/workflow_response.py +++ b/mindee/parsing/common/workflow_response.py @@ -1,9 +1,10 @@ from typing import Generic, Type -from mindee.parsing.common import Inference, StringDict from mindee.parsing.common.api_response import ApiResponse from mindee.parsing.common.execution import Execution +from mindee.parsing.common.inference import Inference from mindee.parsing.common.prediction import TypePrediction +from mindee.parsing.common.string_dict import StringDict class WorkflowResponse(Generic[TypePrediction], ApiResponse): From 509e0906f4cccc0eb06d29a4f5bdbbead8e96811 Mon Sep 17 00:00:00 2001 From: sebastianMindee Date: Tue, 26 Nov 2024 10:53:32 +0100 Subject: [PATCH 7/8] add public url parameter to workflows --- mindee/client.py | 31 ++++++++------------ mindee/input/__init__.py | 1 + mindee/input/workflow_options.py | 28 ++++++++++++++++++ mindee/mindee_http/workflow_endpoint.py | 25 +++++++--------- tests/workflows/test_workflow_integration.py | 6 ++-- 5 files changed, 55 insertions(+), 36 deletions(-) create mode 100644 mindee/input/workflow_options.py diff --git a/mindee/client.py b/mindee/client.py index 199d4c7e..48956e8d 100644 --- a/mindee/client.py +++ b/mindee/client.py @@ -4,6 +4,7 @@ from mindee.error.mindee_error import MindeeClientError, MindeeError from mindee.error.mindee_http_error import handle_error +from mindee.input import WorkflowOptions from mindee.input.local_response import LocalResponse from mindee.input.page_options import PageOptions from mindee.input.sources import ( @@ -25,7 +26,6 @@ from mindee.mindee_http.workflow_endpoint import WorkflowEndpoint from mindee.mindee_http.workflow_settings import WorkflowSettings from mindee.parsing.common.async_predict_response import AsyncPredictResponse -from mindee.parsing.common.execution_priority import ExecutionPriority from mindee.parsing.common.feedback_response import FeedbackResponse from mindee.parsing.common.inference import Inference from mindee.parsing.common.predict_response import PredictResponse @@ -239,10 +239,8 @@ def execute_workflow( self, input_source: Union[LocalInputSource, UrlInputSource], workflow_id: str, + options: Optional[WorkflowOptions] = None, page_options: Optional[PageOptions] = None, - alias: Optional[str] = None, - priority: Optional[ExecutionPriority] = None, - full_text: bool = False, ) -> WorkflowResponse: """ Send the document to an asynchronous endpoint and return its ID in the queue. @@ -252,9 +250,7 @@ def execute_workflow( :param workflow_id: ID of the workflow. :param page_options: If set, remove pages from the document as specified. This is done before sending the file\ to the server. It is useful to avoid page limitations. - :param alias: Optional alias for the document. - :param priority: Optional priority for the document. - :param full_text: Whether to include the full OCR text response in compatible APIs. + :param options: Options for the workflow. :return: """ if isinstance(input_source, LocalInputSource): @@ -267,9 +263,12 @@ def execute_workflow( logger.debug("Sending document to workflow: %s", workflow_id) - return self._send_to_workflow( - GeneratedV1, input_source, workflow_id, alias, priority, full_text - ) + if not options: + options = WorkflowOptions( + alias=None, priority=None, full_text=False, public_url=None + ) + + return self._send_to_workflow(GeneratedV1, input_source, workflow_id, options) def _validate_async_params( self, initial_delay_sec: float, delay_sec: float, max_retries: int @@ -484,9 +483,7 @@ def _send_to_workflow( product_class: Type[Inference], input_source: Union[LocalInputSource, UrlInputSource], workflow_id: str, - alias: Optional[str] = None, - priority: Optional[ExecutionPriority] = None, - full_text: bool = False, + options: WorkflowOptions, ) -> WorkflowResponse: """ Sends a document to a workflow. @@ -497,9 +494,7 @@ def _send_to_workflow( :param input_source: The document/source file to use. Has to be created beforehand. :param workflow_id: ID of the workflow. - :param alias: Optional alias for the document. - :param priority: Priority for the document. - :param full_text: Whether to include the full OCR text response in compatible APIs. + :param options: Optional options for the workflow. :return: """ if input_source is None: @@ -509,9 +504,7 @@ def _send_to_workflow( WorkflowSettings(api_key=self.api_key, workflow_id=workflow_id) ) - response = workflow_endpoint.workflow_execution_post( - input_source, alias, priority, full_text - ) + response = workflow_endpoint.workflow_execution_post(input_source, options) dict_response = response.json() diff --git a/mindee/input/__init__.py b/mindee/input/__init__.py index 81744be8..008d880d 100644 --- a/mindee/input/__init__.py +++ b/mindee/input/__init__.py @@ -9,3 +9,4 @@ PathInput, UrlInputSource, ) +from mindee.input.workflow_options import WorkflowOptions diff --git a/mindee/input/workflow_options.py b/mindee/input/workflow_options.py new file mode 100644 index 00000000..32b56d45 --- /dev/null +++ b/mindee/input/workflow_options.py @@ -0,0 +1,28 @@ +from typing import Optional + +from mindee.parsing.common import ExecutionPriority + + +class WorkflowOptions: + """Options to pass to a workflow execution.""" + + alias: Optional[str] + """Alias for the document.""" + priority: Optional[ExecutionPriority] + """Priority of the document.""" + full_text: bool + """Whether to include the full OCR text response in compatible APIs.""" + public_url: Optional[str] + """A unique, encrypted URL for accessing the document validation interface without requiring authentication.""" + + def __init__( + self, + alias: Optional[str] = None, + priority: Optional[ExecutionPriority] = None, + full_text: Optional[bool] = False, + public_url: Optional[str] = None, + ): + self.alias = alias + self.priority = priority + self.full_text = full_text if full_text else False + self.public_url = public_url diff --git a/mindee/mindee_http/workflow_endpoint.py b/mindee/mindee_http/workflow_endpoint.py index 02503bee..4fe26d87 100644 --- a/mindee/mindee_http/workflow_endpoint.py +++ b/mindee/mindee_http/workflow_endpoint.py @@ -1,11 +1,10 @@ -from typing import Optional, Union +from typing import Union import requests -from mindee.input import LocalInputSource, UrlInputSource +from mindee.input import LocalInputSource, UrlInputSource, WorkflowOptions from mindee.mindee_http.base_endpoint import BaseEndpoint from mindee.mindee_http.workflow_settings import WorkflowSettings -from mindee.parsing.common.execution_priority import ExecutionPriority class WorkflowEndpoint(BaseEndpoint): @@ -24,29 +23,27 @@ def __init__(self, settings: WorkflowSettings) -> None: def workflow_execution_post( self, input_source: Union[LocalInputSource, UrlInputSource], - alias: Optional[str] = None, - priority: Optional[ExecutionPriority] = None, - full_text: bool = False, + options: WorkflowOptions, ): """ Sends the document to the workflow. :param input_source: The document/source file to use. Has to be created beforehand. - :param alias: Optional alias for the document. - :param priority: Priority for the document. - :param full_text: Whether to include the full OCR text response in compatible APIs. + :param options: Options for the workflow. :return: """ data = {} - if alias: - data["alias"] = alias - if priority: - data["priority"] = priority.value + if options.alias: + data["alias"] = options.alias + if options.priority: + data["priority"] = options.priority.value + if options.public_url: + data["public_url"] = options.public_url params = {} - if full_text: + if options.full_text: params["full_text_ocr"] = "true" if isinstance(input_source, UrlInputSource): diff --git a/tests/workflows/test_workflow_integration.py b/tests/workflows/test_workflow_integration.py index d378fa34..3d963393 100644 --- a/tests/workflows/test_workflow_integration.py +++ b/tests/workflows/test_workflow_integration.py @@ -4,6 +4,7 @@ import pytest from mindee import Client +from mindee.input import WorkflowOptions from mindee.parsing.common.execution_priority import ExecutionPriority from tests.product import PRODUCT_DATA_DIR @@ -29,10 +30,9 @@ def test_workflow(mindee_client: Client, workflow_id: str, input_path: str): current_date_time = datetime.now().strftime("%Y-%m-%d-%H:%M:%S") alias = f"python-{current_date_time}" priority = ExecutionPriority.LOW + options = WorkflowOptions(alias=alias, priority=priority) - response = mindee_client.execute_workflow( - input_source, workflow_id, alias=alias, priority=priority - ) + response = mindee_client.execute_workflow(input_source, workflow_id, options) assert response.api_request.status_code == 202 assert response.execution.file.alias == f"python-{current_date_time}" From 37ab31a0717061def3bf032eb8461301aa29af18 Mon Sep 17 00:00:00 2001 From: sebastianMindee Date: Tue, 26 Nov 2024 15:40:07 +0100 Subject: [PATCH 8/8] apply suggestions --- docs/extras/code_samples/workflow_execution.txt | 1 + mindee/client.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/extras/code_samples/workflow_execution.txt b/docs/extras/code_samples/workflow_execution.txt index 29bcdddb..567352cc 100644 --- a/docs/extras/code_samples/workflow_execution.txt +++ b/docs/extras/code_samples/workflow_execution.txt @@ -1,4 +1,5 @@ from mindee import Client, WorkflowResponse +from mindee.parsing.common import ExecutionPriority # Init a new client mindee_client = Client(api_key: "my-api-key") diff --git a/mindee/client.py b/mindee/client.py index 48956e8d..54111be0 100644 --- a/mindee/client.py +++ b/mindee/client.py @@ -243,7 +243,7 @@ def execute_workflow( page_options: Optional[PageOptions] = None, ) -> WorkflowResponse: """ - Send the document to an asynchronous endpoint and return its ID in the queue. + Send the document to a workflow execution. :param input_source: The document/source file to use. Has to be created beforehand.