3131ssm_client = boto3 .client ('ssm' )
3232bedrock_client = boto3 .client ('bedrock-data-automation' )
3333SAGEMAKER_A2I_REVIEW_PORTAL_URL = os .environ .get ('SAGEMAKER_A2I_REVIEW_PORTAL_URL' , '' )
34+ enable_hitl = os .environ .get ('ENABLE_HITL' , 'false' ).lower () == 'true'
3435
3536def get_confidence_threshold_from_config (document : Document ) -> float :
3637 """
@@ -43,16 +44,16 @@ def get_confidence_threshold_from_config(document: Document) -> float:
4344 float: The confidence threshold as a decimal (0.0-1.0)
4445 """
4546 try :
46- config = get_config (document )
47- assessment_config = config .get ('assessment' , {})
48- threshold_value = float (assessment_config .get ('default_confidence_threshold' , 0.8 ))
47+ config = get_config ()
48+ threshold_value = float (config ['assessment' ]['default_confidence_threshold' ])
4949
5050 # Validate that the threshold is in the expected 0.0-1.0 range
5151 if threshold_value < 0.0 or threshold_value > 1.0 :
5252 logger .warning (f"Invalid confidence threshold value { threshold_value } . Must be between 0.0 and 1.0. Using default: 0.80" )
5353 return 0.80
5454
5555 logger .info (f"Retrieved confidence threshold from configuration: { threshold_value } " )
56+ threshold_value = 0.8
5657 return threshold_value
5758 except Exception as e :
5859 logger .warning (f"Failed to retrieve confidence threshold from configuration: { e } " )
@@ -870,11 +871,13 @@ def process_segments(
870871 bp_confidence = custom_output ["matched_blueprint" ]["confidence" ]
871872
872873 # Check if any key-value or blueprint confidence is below threshold
873- low_confidence = any (
874+ low_confidence = ( any (
874875 kv ['confidence' ] < confidence_threshold
875876 for page_num in page_indices
876877 for kv in pagespecific_details ['key_value_details' ].get (str (page_num ), [])
877- ) or float (bp_confidence ) < confidence_threshold
878+ ) or float (bp_confidence ) < confidence_threshold ) and enable_hitl == 'true'
879+
880+ logger .info (f"HITL STatus Low confidence { low_confidence } " )
878881
879882 item .update ({
880883 "page_array" : page_indices ,
@@ -931,7 +934,7 @@ def process_segments(
931934 page_array = list (range (start_page , end_page + 1 ))
932935 item .update ({
933936 "page_array" : page_array ,
934- "hitl_triggered" : True ,
937+ "hitl_triggered" : enable_hitl ,
935938 "extraction_bp_name" : "None" ,
936939 "extracted_result" : std_output
937940 })
@@ -941,30 +944,31 @@ def process_segments(
941944 record_number = record_number ,
942945 bp_match = segment .get ('custom_output_status' ),
943946 extraction_bp_name = "None" ,
944- hitl_triggered = True ,
947+ hitl_triggered = enable_hitl ,
945948 page_array = page_array ,
946949 review_portal_url = SAGEMAKER_A2I_REVIEW_PORTAL_URL
947950 )
948951
949- hitl_triggered = True
950- for page_number in range (start_page , end_page + 1 ):
951- ImageUri = f"s3://{ output_bucket } /{ object_key } /pages/{ page_number } /image.jpg"
952- try :
953- human_loop_response = start_human_loop (
954- execution_id = execution_id ,
955- kv_pairs = [],
956- source_image_uri = ImageUri ,
957- bounding_boxes = [],
958- blueprintName = "" ,
959- bp_confidence = 0.00 ,
960- confidenceThreshold = confidence_threshold ,
961- page_id = page_number ,
962- page_indices = page_array ,
963- record_number = record_number
964- )
965- logger .info (f"Triggered human loop for page { page_number } : { human_loop_response } " )
966- except Exception as e :
967- logger .error (f"Failed to start human loop for page { page_number } : { str (e )} " )
952+ hitl_triggered = enable_hitl
953+ if enable_hitl :
954+ for page_number in range (start_page , end_page + 1 ):
955+ ImageUri = f"s3://{ output_bucket } /{ object_key } /pages/{ page_number } /image.jpg"
956+ try :
957+ human_loop_response = start_human_loop (
958+ execution_id = execution_id ,
959+ kv_pairs = [],
960+ source_image_uri = ImageUri ,
961+ bounding_boxes = [],
962+ blueprintName = "" ,
963+ bp_confidence = 0.00 ,
964+ confidenceThreshold = confidence_threshold ,
965+ page_id = page_number ,
966+ page_indices = page_array ,
967+ record_number = record_number
968+ )
969+ logger .info (f"Triggered human loop for page { page_number } : { human_loop_response } " )
970+ except Exception as e :
971+ logger .error (f"Failed to start human loop for page { page_number } : { str (e )} " )
968972
969973 document .hitl_metadata .append (hitl_metadata )
970974
@@ -1102,53 +1106,51 @@ def handler(event, context):
11021106
11031107 # Process HITL if enabled
11041108 hitl_triggered = "false"
1105- enable_hitl = os .environ .get ('ENABLE_HITL' , 'false' ).lower () == 'true'
11061109
1107- if enable_hitl :
1108- try :
1109- # Use the confidence threshold already calculated above
1110- metdatafile_path = '/' .join (bda_result_prefix .split ('/' )[:- 1 ])
1111- job_metadata_key = f'{ metdatafile_path } /job_metadata.json'
1112- execution_id = event .get ("execution_arn" , "" ).split (':' )[- 1 ]
1113- logger .info (f"HITL execution ID: { execution_id } " )
1110+ try :
1111+ # Use the confidence threshold already calculated above
1112+ metdatafile_path = '/' .join (bda_result_prefix .split ('/' )[:- 1 ])
1113+ job_metadata_key = f'{ metdatafile_path } /job_metadata.json'
1114+ execution_id = event .get ("execution_arn" , "" ).split (':' )[- 1 ]
1115+ logger .info (f"HITL execution ID: { execution_id } " )
11141116
1115- try :
1116- jobmetadata_file = s3_client .get_object (Bucket = bda_result_bucket , Key = job_metadata_key )
1117- job_metadata = json .loads (jobmetadata_file ['Body' ].read ())
1118- if 'output_metadata' in job_metadata :
1119- output_metadata = job_metadata ['output_metadata' ]
1120- if isinstance (output_metadata , list ):
1121- for asset in output_metadata :
1122- document , hitl_result = process_segments (
1123- input_bucket ,
1124- output_bucket ,
1125- object_key ,
1126- asset .get ('segment_metadata' , []),
1127- confidence_threshold ,
1128- execution_id ,
1129- document
1130- )
1131- if hitl_result :
1132- hitl_triggered = "true"
1133- elif isinstance (output_metadata , dict ):
1134- for asset_id , asset in output_metadata .items ():
1135- document , hitl_result = process_segments (
1136- input_bucket ,
1137- output_bucket ,
1138- object_key ,
1139- asset .get ('segment_metadata' , []),
1140- confidence_threshold ,
1141- execution_id ,
1142- document
1143- )
1144- if hitl_result :
1145- hitl_triggered = "true"
1146- else :
1147- logger .error ("Unexpected output_metadata format in job_metadata.json" )
1148- except Exception as e :
1149- logger .error (f"Error processing job_metadata.json: { str (e )} " )
1117+ try :
1118+ jobmetadata_file = s3_client .get_object (Bucket = bda_result_bucket , Key = job_metadata_key )
1119+ job_metadata = json .loads (jobmetadata_file ['Body' ].read ())
1120+ if 'output_metadata' in job_metadata :
1121+ output_metadata = job_metadata ['output_metadata' ]
1122+ if isinstance (output_metadata , list ):
1123+ for asset in output_metadata :
1124+ document , hitl_result = process_segments (
1125+ input_bucket ,
1126+ output_bucket ,
1127+ object_key ,
1128+ asset .get ('segment_metadata' , []),
1129+ confidence_threshold ,
1130+ execution_id ,
1131+ document
1132+ )
1133+ if hitl_result :
1134+ hitl_triggered = "true"
1135+ elif isinstance (output_metadata , dict ):
1136+ for asset_id , asset in output_metadata .items ():
1137+ document , hitl_result = process_segments (
1138+ input_bucket ,
1139+ output_bucket ,
1140+ object_key ,
1141+ asset .get ('segment_metadata' , []),
1142+ confidence_threshold ,
1143+ execution_id ,
1144+ document
1145+ )
1146+ if hitl_result :
1147+ hitl_triggered = "true"
1148+ else :
1149+ logger .error ("Unexpected output_metadata format in job_metadata.json" )
11501150 except Exception as e :
1151- logger .error (f"Error in HITL processing: { str (e )} " )
1151+ logger .error (f"Error processing job_metadata.json: { str (e )} " )
1152+ except Exception as e :
1153+ logger .error (f"Error in HITL processing: { str (e )} " )
11521154
11531155 # Record metrics for processed pages
11541156 metrics .put_metric ('ProcessedDocuments' , 1 )
0 commit comments