99from ..._utils import Endpoint
1010from ...tracing import traced
1111from ..common import BaseService , FolderContext , UiPathApiConfig , UiPathExecutionContext
12+ from ..errors import ExtractionNotCompleteException
1213from .documents import (
1314 ActionPriority ,
1415 ClassificationResponse ,
1718 ExtractionResponseIXP ,
1819 FileContent ,
1920 ProjectType ,
21+ StartExtractionResponse ,
2022 ValidateClassificationAction ,
2123 ValidateExtractionAction ,
2224)
@@ -119,7 +121,9 @@ class DocumentsService(FolderContext, BaseService):
119121 """
120122
121123 def __init__ (
122- self , config : UiPathApiConfig , execution_context : UiPathExecutionContext
124+ self ,
125+ config : UiPathApiConfig ,
126+ execution_context : UiPathExecutionContext ,
123127 ) -> None :
124128 super ().__init__ (config = config , execution_context = execution_context )
125129
@@ -433,7 +437,7 @@ def _start_extraction(
433437 tag : Optional [str ],
434438 document_type_id : str ,
435439 document_id : str ,
436- ) -> str :
440+ ) -> StartExtractionResponse :
437441 if project_type == ProjectType .PRETRAINED :
438442 url = Endpoint (
439443 f"/du_/api/framework/projects/{ project_id } /extractors/{ document_type_id } /extraction/start"
@@ -443,22 +447,29 @@ def _start_extraction(
443447 f"/du_/api/framework/projects/{ project_id } /{ tag } /document-types/{ document_type_id } /extraction/start"
444448 )
445449
446- return self .request (
450+ operation_id = self .request (
447451 "POST" ,
448452 url = url ,
449453 params = {"api-version" : 1.1 },
450454 headers = self ._get_common_headers (),
451455 json = {"documentId" : document_id },
452456 ).json ()["operationId" ]
453457
458+ return StartExtractionResponse (
459+ operation_id = operation_id ,
460+ document_id = document_id ,
461+ project_id = project_id ,
462+ tag = tag ,
463+ )
464+
454465 async def _start_extraction_async (
455466 self ,
456467 project_id : str ,
457468 project_type : ProjectType ,
458469 tag : Optional [str ],
459470 document_type_id : str ,
460471 document_id : str ,
461- ) -> str :
472+ ) -> StartExtractionResponse :
462473 if project_type == ProjectType .PRETRAINED :
463474 url = Endpoint (
464475 f"/du_/api/framework/projects/{ project_id } /extractors/{ document_type_id } /extraction/start"
@@ -468,16 +479,21 @@ async def _start_extraction_async(
468479 f"/du_/api/framework/projects/{ project_id } /{ tag } /document-types/{ document_type_id } /extraction/start"
469480 )
470481
471- return (
472- await self .request_async (
473- "POST" ,
474- url = url ,
475- params = {"api-version" : 1.1 },
476- headers = self ._get_common_headers (),
477- json = {"documentId" : document_id },
478- )
482+ operation_id = self .request (
483+ "POST" ,
484+ url = url ,
485+ params = {"api-version" : 1.1 },
486+ headers = self ._get_common_headers (),
487+ json = {"documentId" : document_id },
479488 ).json ()["operationId" ]
480489
490+ return StartExtractionResponse (
491+ operation_id = operation_id ,
492+ document_id = document_id ,
493+ project_id = project_id ,
494+ tag = tag ,
495+ )
496+
481497 def _wait_for_operation (
482498 self ,
483499 result_getter : Callable [[], Tuple [Any , Optional [Any ], Optional [Any ]]],
@@ -887,6 +903,189 @@ async def classify_async(
887903 operation_id = operation_id ,
888904 )
889905
906+ @traced (name = "documents_start_ixp_extraction" , run_type = "uipath" )
907+ def start_ixp_extraction (
908+ self ,
909+ project_name : str ,
910+ tag : str ,
911+ file : Optional [FileContent ] = None ,
912+ file_path : Optional [str ] = None ,
913+ ) -> StartExtractionResponse :
914+ """Start an IXP extraction process without waiting for results (non-blocking).
915+
916+ This method uploads the file as an attachment and starts the extraction process,
917+ returning immediately without waiting for the extraction to complete.
918+ Use this for async workflows where you want to receive results via callback/webhook.
919+
920+ Args:
921+ project_name (str): Name of the IXP project.
922+ tag (str): Tag of the published project version (e.g., "staging").
923+ file (FileContent, optional): The document file to be processed.
924+ file_path (str, optional): Path to the document file to be processed.
925+
926+ Note:
927+ Either `file` or `file_path` must be provided, but not both.
928+
929+ Returns:
930+ ExtractionStartResponse: Contains the operation_id, document_id, project_id, and tag
931+
932+ Examples:
933+ ```python
934+ start_response = uipath.documents.start_ixp_extraction(
935+ project_name="MyIXPProjectName",
936+ tag="staging",
937+ file_path="path/to/document.pdf",
938+ )
939+ # start_response.operation_id can be used to poll for results later
940+ ```
941+ """
942+ _exactly_one_must_be_provided (file = file , file_path = file_path )
943+
944+ project_id = self ._get_project_id_by_name (project_name , ProjectType .IXP )
945+
946+ document_id = self ._start_digitization (
947+ project_id = project_id ,
948+ file = file ,
949+ file_path = file_path ,
950+ )
951+
952+ return self ._start_extraction (
953+ project_id = project_id ,
954+ project_type = ProjectType .IXP ,
955+ tag = tag ,
956+ document_type_id = str (UUID (int = 0 )),
957+ document_id = document_id ,
958+ )
959+
960+ @traced (name = "documents_start_ixp_extraction_async" , run_type = "uipath" )
961+ async def start_ixp_extraction_async (
962+ self ,
963+ project_name : str ,
964+ tag : str ,
965+ file : Optional [FileContent ] = None ,
966+ file_path : Optional [str ] = None ,
967+ ) -> StartExtractionResponse :
968+ """Asynchronous version of the [`start_ixp_extraction`][uipath.platform.documents._documents_service.DocumentsService.start_ixp_extraction] method."""
969+ _exactly_one_must_be_provided (file = file , file_path = file_path )
970+
971+ project_id = await self ._get_project_id_by_name_async (
972+ project_name , ProjectType .IXP
973+ )
974+
975+ document_id = await self ._start_digitization_async (
976+ project_id = project_id ,
977+ file = file ,
978+ file_path = file_path ,
979+ )
980+
981+ return await self ._start_extraction_async (
982+ project_id = project_id ,
983+ project_type = ProjectType .IXP ,
984+ tag = tag ,
985+ document_type_id = str (UUID (int = 0 )),
986+ document_id = document_id ,
987+ )
988+
989+ @traced (name = "documents_retrieve_ixp_extraction_result" , run_type = "uipath" )
990+ def retrieve_ixp_extraction_result (
991+ self ,
992+ project_id : str ,
993+ tag : str ,
994+ operation_id : str ,
995+ ) -> ExtractionResponseIXP :
996+ """Retrieve the result of an IXP extraction operation (single-shot, non-blocking).
997+
998+ This method retrieves the result of an IXP extraction that was previously started
999+ with `start_ixp_extraction`. It does not poll - it makes a single request and
1000+ returns the result if available, or raises an exception if not complete.
1001+
1002+ Args:
1003+ project_id (str): The ID of the IXP project.
1004+ tag (str): The tag of the published project version.
1005+ operation_id (str): The operation ID returned from `start_ixp_extraction`.
1006+
1007+ Returns:
1008+ ExtractionResponseIXP: The extraction response containing the extracted data.
1009+
1010+ Raises:
1011+ IxpExtractionNotCompleteException: If the extraction is not yet complete.
1012+
1013+ Examples:
1014+ ```python
1015+ # After receiving a callback/webhook that extraction is complete:
1016+ result = service.retrieve_ixp_extraction_result(
1017+ project_id=start_response.project_id,
1018+ tag=start_response.tag,
1019+ operation_id=start_response.operation_id,
1020+ )
1021+ ```
1022+ """
1023+ document_type_id = str (UUID (int = 0 ))
1024+
1025+ url = Endpoint (
1026+ f"/du_/api/framework/projects/{ project_id } /{ tag } /document-types/{ document_type_id } /extraction/result/{ operation_id } "
1027+ )
1028+
1029+ result = self .request (
1030+ method = "GET" ,
1031+ url = url ,
1032+ params = {"api-version" : "1.1" },
1033+ headers = self ._get_common_headers (),
1034+ ).json ()
1035+
1036+ status = result .get ("status" )
1037+ if status in ["NotStarted" , "Running" ]:
1038+ raise ExtractionNotCompleteException (
1039+ operation_id = operation_id ,
1040+ status = status ,
1041+ )
1042+
1043+ extraction_response = result .get ("result" )
1044+ extraction_response ["projectId" ] = project_id
1045+ extraction_response ["tag" ] = tag
1046+ extraction_response ["documentTypeId" ] = document_type_id
1047+ extraction_response ["projectType" ] = ProjectType .IXP
1048+
1049+ return ExtractionResponseIXP .model_validate (extraction_response )
1050+
1051+ @traced (name = "documents_retrieve_ixp_extraction_result_async" , run_type = "uipath" )
1052+ async def retrieve_ixp_extraction_result_async (
1053+ self ,
1054+ project_id : str ,
1055+ tag : str ,
1056+ operation_id : str ,
1057+ ) -> ExtractionResponseIXP :
1058+ """Asynchronous version of the [`retrieve_ixp_extraction_result`][uipath.platform.documents._documents_service.DocumentsService.retrieve_ixp_extraction_result] method."""
1059+ document_type_id = str (UUID (int = 0 ))
1060+
1061+ url = Endpoint (
1062+ f"/du_/api/framework/projects/{ project_id } /{ tag } /document-types/{ document_type_id } /extraction/result/{ operation_id } "
1063+ )
1064+
1065+ result = (
1066+ await self .request_async (
1067+ method = "GET" ,
1068+ url = url ,
1069+ params = {"api-version" : "1.1" },
1070+ headers = self ._get_common_headers (),
1071+ )
1072+ ).json ()
1073+
1074+ status = result .get ("status" )
1075+ if status in ["NotStarted" , "Running" ]:
1076+ raise ExtractionNotCompleteException (
1077+ operation_id = operation_id ,
1078+ status = status ,
1079+ )
1080+
1081+ extraction_response = result .get ("result" )
1082+ extraction_response ["projectId" ] = project_id
1083+ extraction_response ["tag" ] = tag
1084+ extraction_response ["documentTypeId" ] = document_type_id
1085+ extraction_response ["projectType" ] = ProjectType .IXP
1086+
1087+ return ExtractionResponseIXP .model_validate (extraction_response )
1088+
8901089 @traced (name = "documents_extract" , run_type = "uipath" )
8911090 def extract (
8921091 self ,
@@ -989,7 +1188,7 @@ def extract(
9891188 tag = tag ,
9901189 document_type_id = document_type_id ,
9911190 document_id = document_id ,
992- )
1191+ ). operation_id
9931192
9941193 return self ._wait_for_extraction (
9951194 project_id = project_id ,
@@ -1042,13 +1241,15 @@ async def extract_async(
10421241 classification_result = classification_result ,
10431242 )
10441243
1045- operation_id = await self ._start_extraction_async (
1046- project_id = project_id ,
1047- project_type = project_type ,
1048- tag = tag ,
1049- document_type_id = document_type_id ,
1050- document_id = document_id ,
1051- )
1244+ operation_id = (
1245+ await self ._start_extraction_async (
1246+ project_id = project_id ,
1247+ project_type = project_type ,
1248+ tag = tag ,
1249+ document_type_id = document_type_id ,
1250+ document_id = document_id ,
1251+ )
1252+ ).operation_id
10521253
10531254 return await self ._wait_for_extraction_async (
10541255 project_id = project_id ,
0 commit comments