Skip to content

Commit f48bf22

Browse files
authored
Merge pull request #1013 from UiPath/feat/ixp-lrwf
feat: add support for ixp extraction non-blocking
2 parents d5bfdee + 69d8cf1 commit f48bf22

File tree

9 files changed

+516
-22
lines changed

9 files changed

+516
-22
lines changed

src/uipath/platform/context_grounding/_context_grounding_service.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ def __init__(
7272
self._buckets_service = buckets_service
7373
super().__init__(config=config, execution_context=execution_context)
7474

75+
# 2.3.0 prefix trace name with contextgrounding
7576
@traced(name="add_to_index", run_type="uipath")
7677
@resource_override(resource_type="index")
7778
def add_to_index(
@@ -127,6 +128,7 @@ def add_to_index(
127128
if ingest_data:
128129
self.ingest_data(index, folder_key=folder_key, folder_path=folder_path)
129130

131+
# 2.3.0 prefix trace name with contextgrounding
130132
@traced(name="add_to_index", run_type="uipath")
131133
@resource_override(resource_type="index")
132134
async def add_to_index_async(

src/uipath/platform/documents/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
FileContent,
1919
ProjectType,
2020
Reference,
21+
StartExtractionResponse,
2122
ValidateClassificationAction,
2223
ValidateExtractionAction,
2324
ValidationAction,
@@ -41,4 +42,5 @@
4142
"ClassificationResult",
4243
"ClassificationResponse",
4344
"FileContent",
45+
"StartExtractionResponse",
4446
]

src/uipath/platform/documents/_documents_service.py

Lines changed: 221 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from ..._utils import Endpoint
1010
from ...tracing import traced
1111
from ..common import BaseService, FolderContext, UiPathApiConfig, UiPathExecutionContext
12+
from ..errors import ExtractionNotCompleteException
1213
from .documents import (
1314
ActionPriority,
1415
ClassificationResponse,
@@ -17,6 +18,7 @@
1718
ExtractionResponseIXP,
1819
FileContent,
1920
ProjectType,
21+
StartExtractionResponse,
2022
ValidateClassificationAction,
2123
ValidateExtractionAction,
2224
)
@@ -119,7 +121,9 @@ class DocumentsService(FolderContext, BaseService):
119121
"""
120122

121123
def __init__(
122-
self, config: UiPathApiConfig, execution_context: UiPathExecutionContext
124+
self,
125+
config: UiPathApiConfig,
126+
execution_context: UiPathExecutionContext,
123127
) -> None:
124128
super().__init__(config=config, execution_context=execution_context)
125129

@@ -433,7 +437,7 @@ def _start_extraction(
433437
tag: Optional[str],
434438
document_type_id: str,
435439
document_id: str,
436-
) -> str:
440+
) -> StartExtractionResponse:
437441
if project_type == ProjectType.PRETRAINED:
438442
url = Endpoint(
439443
f"/du_/api/framework/projects/{project_id}/extractors/{document_type_id}/extraction/start"
@@ -443,22 +447,29 @@ def _start_extraction(
443447
f"/du_/api/framework/projects/{project_id}/{tag}/document-types/{document_type_id}/extraction/start"
444448
)
445449

446-
return self.request(
450+
operation_id = self.request(
447451
"POST",
448452
url=url,
449453
params={"api-version": 1.1},
450454
headers=self._get_common_headers(),
451455
json={"documentId": document_id},
452456
).json()["operationId"]
453457

458+
return StartExtractionResponse(
459+
operation_id=operation_id,
460+
document_id=document_id,
461+
project_id=project_id,
462+
tag=tag,
463+
)
464+
454465
async def _start_extraction_async(
455466
self,
456467
project_id: str,
457468
project_type: ProjectType,
458469
tag: Optional[str],
459470
document_type_id: str,
460471
document_id: str,
461-
) -> str:
472+
) -> StartExtractionResponse:
462473
if project_type == ProjectType.PRETRAINED:
463474
url = Endpoint(
464475
f"/du_/api/framework/projects/{project_id}/extractors/{document_type_id}/extraction/start"
@@ -468,16 +479,21 @@ async def _start_extraction_async(
468479
f"/du_/api/framework/projects/{project_id}/{tag}/document-types/{document_type_id}/extraction/start"
469480
)
470481

471-
return (
472-
await self.request_async(
473-
"POST",
474-
url=url,
475-
params={"api-version": 1.1},
476-
headers=self._get_common_headers(),
477-
json={"documentId": document_id},
478-
)
482+
operation_id = self.request(
483+
"POST",
484+
url=url,
485+
params={"api-version": 1.1},
486+
headers=self._get_common_headers(),
487+
json={"documentId": document_id},
479488
).json()["operationId"]
480489

490+
return StartExtractionResponse(
491+
operation_id=operation_id,
492+
document_id=document_id,
493+
project_id=project_id,
494+
tag=tag,
495+
)
496+
481497
def _wait_for_operation(
482498
self,
483499
result_getter: Callable[[], Tuple[Any, Optional[Any], Optional[Any]]],
@@ -887,6 +903,189 @@ async def classify_async(
887903
operation_id=operation_id,
888904
)
889905

906+
@traced(name="documents_start_ixp_extraction", run_type="uipath")
907+
def start_ixp_extraction(
908+
self,
909+
project_name: str,
910+
tag: str,
911+
file: Optional[FileContent] = None,
912+
file_path: Optional[str] = None,
913+
) -> StartExtractionResponse:
914+
"""Start an IXP extraction process without waiting for results (non-blocking).
915+
916+
This method uploads the file as an attachment and starts the extraction process,
917+
returning immediately without waiting for the extraction to complete.
918+
Use this for async workflows where you want to receive results via callback/webhook.
919+
920+
Args:
921+
project_name (str): Name of the IXP project.
922+
tag (str): Tag of the published project version (e.g., "staging").
923+
file (FileContent, optional): The document file to be processed.
924+
file_path (str, optional): Path to the document file to be processed.
925+
926+
Note:
927+
Either `file` or `file_path` must be provided, but not both.
928+
929+
Returns:
930+
ExtractionStartResponse: Contains the operation_id, document_id, project_id, and tag
931+
932+
Examples:
933+
```python
934+
start_response = uipath.documents.start_ixp_extraction(
935+
project_name="MyIXPProjectName",
936+
tag="staging",
937+
file_path="path/to/document.pdf",
938+
)
939+
# start_response.operation_id can be used to poll for results later
940+
```
941+
"""
942+
_exactly_one_must_be_provided(file=file, file_path=file_path)
943+
944+
project_id = self._get_project_id_by_name(project_name, ProjectType.IXP)
945+
946+
document_id = self._start_digitization(
947+
project_id=project_id,
948+
file=file,
949+
file_path=file_path,
950+
)
951+
952+
return self._start_extraction(
953+
project_id=project_id,
954+
project_type=ProjectType.IXP,
955+
tag=tag,
956+
document_type_id=str(UUID(int=0)),
957+
document_id=document_id,
958+
)
959+
960+
@traced(name="documents_start_ixp_extraction_async", run_type="uipath")
961+
async def start_ixp_extraction_async(
962+
self,
963+
project_name: str,
964+
tag: str,
965+
file: Optional[FileContent] = None,
966+
file_path: Optional[str] = None,
967+
) -> StartExtractionResponse:
968+
"""Asynchronous version of the [`start_ixp_extraction`][uipath.platform.documents._documents_service.DocumentsService.start_ixp_extraction] method."""
969+
_exactly_one_must_be_provided(file=file, file_path=file_path)
970+
971+
project_id = await self._get_project_id_by_name_async(
972+
project_name, ProjectType.IXP
973+
)
974+
975+
document_id = await self._start_digitization_async(
976+
project_id=project_id,
977+
file=file,
978+
file_path=file_path,
979+
)
980+
981+
return await self._start_extraction_async(
982+
project_id=project_id,
983+
project_type=ProjectType.IXP,
984+
tag=tag,
985+
document_type_id=str(UUID(int=0)),
986+
document_id=document_id,
987+
)
988+
989+
@traced(name="documents_retrieve_ixp_extraction_result", run_type="uipath")
990+
def retrieve_ixp_extraction_result(
991+
self,
992+
project_id: str,
993+
tag: str,
994+
operation_id: str,
995+
) -> ExtractionResponseIXP:
996+
"""Retrieve the result of an IXP extraction operation (single-shot, non-blocking).
997+
998+
This method retrieves the result of an IXP extraction that was previously started
999+
with `start_ixp_extraction`. It does not poll - it makes a single request and
1000+
returns the result if available, or raises an exception if not complete.
1001+
1002+
Args:
1003+
project_id (str): The ID of the IXP project.
1004+
tag (str): The tag of the published project version.
1005+
operation_id (str): The operation ID returned from `start_ixp_extraction`.
1006+
1007+
Returns:
1008+
ExtractionResponseIXP: The extraction response containing the extracted data.
1009+
1010+
Raises:
1011+
IxpExtractionNotCompleteException: If the extraction is not yet complete.
1012+
1013+
Examples:
1014+
```python
1015+
# After receiving a callback/webhook that extraction is complete:
1016+
result = service.retrieve_ixp_extraction_result(
1017+
project_id=start_response.project_id,
1018+
tag=start_response.tag,
1019+
operation_id=start_response.operation_id,
1020+
)
1021+
```
1022+
"""
1023+
document_type_id = str(UUID(int=0))
1024+
1025+
url = Endpoint(
1026+
f"/du_/api/framework/projects/{project_id}/{tag}/document-types/{document_type_id}/extraction/result/{operation_id}"
1027+
)
1028+
1029+
result = self.request(
1030+
method="GET",
1031+
url=url,
1032+
params={"api-version": "1.1"},
1033+
headers=self._get_common_headers(),
1034+
).json()
1035+
1036+
status = result.get("status")
1037+
if status in ["NotStarted", "Running"]:
1038+
raise ExtractionNotCompleteException(
1039+
operation_id=operation_id,
1040+
status=status,
1041+
)
1042+
1043+
extraction_response = result.get("result")
1044+
extraction_response["projectId"] = project_id
1045+
extraction_response["tag"] = tag
1046+
extraction_response["documentTypeId"] = document_type_id
1047+
extraction_response["projectType"] = ProjectType.IXP
1048+
1049+
return ExtractionResponseIXP.model_validate(extraction_response)
1050+
1051+
@traced(name="documents_retrieve_ixp_extraction_result_async", run_type="uipath")
1052+
async def retrieve_ixp_extraction_result_async(
1053+
self,
1054+
project_id: str,
1055+
tag: str,
1056+
operation_id: str,
1057+
) -> ExtractionResponseIXP:
1058+
"""Asynchronous version of the [`retrieve_ixp_extraction_result`][uipath.platform.documents._documents_service.DocumentsService.retrieve_ixp_extraction_result] method."""
1059+
document_type_id = str(UUID(int=0))
1060+
1061+
url = Endpoint(
1062+
f"/du_/api/framework/projects/{project_id}/{tag}/document-types/{document_type_id}/extraction/result/{operation_id}"
1063+
)
1064+
1065+
result = (
1066+
await self.request_async(
1067+
method="GET",
1068+
url=url,
1069+
params={"api-version": "1.1"},
1070+
headers=self._get_common_headers(),
1071+
)
1072+
).json()
1073+
1074+
status = result.get("status")
1075+
if status in ["NotStarted", "Running"]:
1076+
raise ExtractionNotCompleteException(
1077+
operation_id=operation_id,
1078+
status=status,
1079+
)
1080+
1081+
extraction_response = result.get("result")
1082+
extraction_response["projectId"] = project_id
1083+
extraction_response["tag"] = tag
1084+
extraction_response["documentTypeId"] = document_type_id
1085+
extraction_response["projectType"] = ProjectType.IXP
1086+
1087+
return ExtractionResponseIXP.model_validate(extraction_response)
1088+
8901089
@traced(name="documents_extract", run_type="uipath")
8911090
def extract(
8921091
self,
@@ -989,7 +1188,7 @@ def extract(
9891188
tag=tag,
9901189
document_type_id=document_type_id,
9911190
document_id=document_id,
992-
)
1191+
).operation_id
9931192

9941193
return self._wait_for_extraction(
9951194
project_id=project_id,
@@ -1042,13 +1241,15 @@ async def extract_async(
10421241
classification_result=classification_result,
10431242
)
10441243

1045-
operation_id = await self._start_extraction_async(
1046-
project_id=project_id,
1047-
project_type=project_type,
1048-
tag=tag,
1049-
document_type_id=document_type_id,
1050-
document_id=document_id,
1051-
)
1244+
operation_id = (
1245+
await self._start_extraction_async(
1246+
project_id=project_id,
1247+
project_type=project_type,
1248+
tag=tag,
1249+
document_type_id=document_type_id,
1250+
document_id=document_id,
1251+
)
1252+
).operation_id
10521253

10531254
return await self._wait_for_extraction_async(
10541255
project_id=project_id,

0 commit comments

Comments
 (0)