Skip to content

Commit 66a57f2

Browse files
author
Bob Strahan
committed
Add evaluation step to all processing patterns with EVALUATING status and UI support
1 parent cc2f0d6 commit 66a57f2

File tree

12 files changed

+189
-84
lines changed

12 files changed

+189
-84
lines changed

lib/idp_common_pkg/idp_common/models.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ class Status(Enum):
2727
POSTPROCESSING = "POSTPROCESSING" # Document summarization
2828
HITL_IN_PROGRESS = "HITL_IN_PROGRESS" # Human-in-the-loop review in progress
2929
SUMMARIZING = "SUMMARIZING" # Document summarization
30+
EVALUATING = "EVALUATING" # Document evaluation
3031
COMPLETED = "COMPLETED" # All processing completed
3132
FAILED = "FAILED" # Processing failed
3233

patterns/pattern-1/statemachine/workflow.asl.json

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,33 @@
167167
"BackoffRate": 2
168168
}
169169
],
170+
"Next": "EvaluationStep"
171+
},
172+
"EvaluationStep": {
173+
"Type": "Task",
174+
"Resource": "${EvaluationLambdaArn}",
175+
"Parameters": {
176+
"execution_arn.$": "$$.Execution.Id",
177+
"document.$": "$.Result.document"
178+
},
179+
"ResultPath": "$.Result",
180+
"Retry": [
181+
{
182+
"ErrorEquals": [
183+
"Lambda.ServiceException",
184+
"Lambda.AWSLambdaException",
185+
"Lambda.SdkClientException",
186+
"Lambda.TooManyRequestsException",
187+
"ServiceQuotaExceededException",
188+
"ThrottlingException",
189+
"ProvisionedThroughputExceededException",
190+
"RequestLimitExceeded"
191+
],
192+
"IntervalSeconds": 2,
193+
"MaxAttempts": 10,
194+
"BackoffRate": 2
195+
}
196+
],
170197
"Next": "WorkflowComplete"
171198
},
172199
"WorkflowComplete": {

patterns/pattern-1/template.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,10 @@ Parameters:
9494
ConfigLibraryHash:
9595
Type: String
9696
Description: "Hash token from config library to force updates when config library changes"
97+
98+
EvaluationFunctionArn:
99+
Type: String
100+
Description: "ARN of the Evaluation Lambda function"
97101

98102
EnableHITL:
99103
Type: String
@@ -362,6 +366,11 @@ Resources:
362366
type: object
363367
sectionLabel: Evaluation Inference
364368
properties:
369+
enabled:
370+
type: boolean
371+
description: Enable or disable evaluation processing
372+
default: true
373+
order: 0
365374
llm_method:
366375
type: object
367376
properties:
@@ -648,6 +657,7 @@ Resources:
648657
HITLWaitFunctionArn: !GetAtt HITLWaitFunction.Arn
649658
HITLStatusUpdateFunctionArn: !GetAtt HITLStatusUpdateFunction.Arn
650659
SummarizationLambdaArn: !GetAtt SummarizationFunction.Arn
660+
EvaluationLambdaArn: !Ref EvaluationFunctionArn
651661
EnableHITL: !Ref EnableHITL
652662
OutputBucket: !Ref OutputBucket
653663
WorkingBucket: !Ref WorkingBucket
@@ -670,6 +680,8 @@ Resources:
670680
FunctionName: !Ref HITLWaitFunction
671681
- LambdaInvokePolicy:
672682
FunctionName: !Ref HITLStatusUpdateFunction
683+
- LambdaInvokePolicy:
684+
FunctionName: !Ref EvaluationFunctionArn
673685
- CloudWatchLogsFullAccess
674686

675687
StateMachineLogGroup:

patterns/pattern-2/statemachine/workflow.asl.json

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,34 @@
252252
"BackoffRate": 2
253253
}
254254
],
255+
"Next": "EvaluationStep"
256+
},
257+
"EvaluationStep": {
258+
"Type": "Task",
259+
"Resource": "${EvaluationLambdaArn}",
260+
"Parameters": {
261+
"execution_arn.$": "$$.Execution.Id",
262+
"document.$": "$"
263+
},
264+
"ResultPath": "$",
265+
"Retry": [
266+
{
267+
"ErrorEquals": [
268+
"Sandbox.Timedout",
269+
"Lambda.ServiceException",
270+
"Lambda.AWSLambdaException",
271+
"Lambda.SdkClientException",
272+
"Lambda.TooManyRequestsException",
273+
"ServiceQuotaExceededException",
274+
"ThrottlingException",
275+
"ProvisionedThroughputExceededException",
276+
"RequestLimitExceeded"
277+
],
278+
"IntervalSeconds": 2,
279+
"MaxAttempts": 10,
280+
"BackoffRate": 2
281+
}
282+
],
255283
"Next": "WorkflowComplete"
256284
},
257285
"WorkflowComplete": {

patterns/pattern-2/template.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,10 @@ Parameters:
100100
Type: String
101101
Description: "Hash token from config library to force updates when config library changes"
102102

103+
EvaluationFunctionArn:
104+
Type: String
105+
Description: "ARN of the Evaluation Lambda function"
106+
103107
EnableXRayTracing:
104108
Type: String
105109
Default: 'false'
@@ -1108,6 +1112,11 @@ Resources:
11081112
type: object
11091113
sectionLabel: Evaluation Inference
11101114
properties:
1115+
enabled:
1116+
type: boolean
1117+
description: Enable or disable evaluation processing
1118+
default: true
1119+
order: 0
11111120
llm_method:
11121121
type: object
11131122
properties:
@@ -2330,6 +2339,7 @@ Resources:
23302339
HITLWaitFunctionArn: !GetAtt HITLWaitFunction.Arn
23312340
HITLStatusUpdateFunctionArn: !GetAtt HITLStatusUpdateFunction.Arn
23322341
SummarizationLambdaArn: !GetAtt SummarizationFunction.Arn
2342+
EvaluationLambdaArn: !Ref EvaluationFunctionArn
23332343
OutputBucket: !Ref OutputBucket
23342344
Logging:
23352345
Level: ALL
@@ -2355,6 +2365,8 @@ Resources:
23552365
FunctionName: !Ref HITLStatusUpdateFunction
23562366
- LambdaInvokePolicy:
23572367
FunctionName: !Ref SummarizationFunction
2368+
- LambdaInvokePolicy:
2369+
FunctionName: !Ref EvaluationFunctionArn
23582370
- CloudWatchLogsFullAccess
23592371

23602372
StateMachineLogGroup:

patterns/pattern-3/statemachine/workflow.asl.json

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,34 @@
187187
"BackoffRate": 2
188188
}
189189
],
190+
"Next": "EvaluationStep"
191+
},
192+
"EvaluationStep": {
193+
"Type": "Task",
194+
"Resource": "${EvaluationLambdaArn}",
195+
"Parameters": {
196+
"execution_arn.$": "$$.Execution.Id",
197+
"document.$": "$"
198+
},
199+
"ResultPath": "$",
200+
"Retry": [
201+
{
202+
"ErrorEquals": [
203+
"Sandbox.Timedout",
204+
"Lambda.ServiceException",
205+
"Lambda.AWSLambdaException",
206+
"Lambda.SdkClientException",
207+
"Lambda.TooManyRequestsException",
208+
"ServiceQuotaExceededException",
209+
"ThrottlingException",
210+
"ProvisionedThroughputExceededException",
211+
"RequestLimitExceeded"
212+
],
213+
"IntervalSeconds": 2,
214+
"MaxAttempts": 10,
215+
"BackoffRate": 2
216+
}
217+
],
190218
"Next": "WorkflowComplete"
191219
},
192220
"WorkflowComplete": {

patterns/pattern-3/template.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,10 @@ Parameters:
9292
Type: String
9393
Description: "Hash token from config library to force updates when config library changes"
9494

95+
EvaluationFunctionArn:
96+
Type: String
97+
Description: "ARN of the Evaluation Lambda function"
98+
9599
PermissionsBoundaryArn:
96100
Type: String
97101
Default: ""
@@ -700,6 +704,11 @@ Resources:
700704
type: object
701705
sectionLabel: Evaluation Inference
702706
properties:
707+
enabled:
708+
type: boolean
709+
description: Enable or disable evaluation processing
710+
default: true
711+
order: 0
703712
llm_method:
704713
type: object
705714
properties:
@@ -1452,6 +1461,7 @@ Resources:
14521461
AssessmentFunctionArn: !GetAtt AssessmentFunction.Arn
14531462
ProcessResultsLambdaArn: !GetAtt ProcessResultsFunction.Arn
14541463
SummarizationLambdaArn: !GetAtt SummarizationFunction.Arn
1464+
EvaluationLambdaArn: !Ref EvaluationFunctionArn
14551465
OutputBucket: !Ref OutputBucket
14561466
Logging:
14571467
Level: ALL
@@ -1471,6 +1481,8 @@ Resources:
14711481
FunctionName: !Ref ProcessResultsFunction
14721482
- LambdaInvokePolicy:
14731483
FunctionName: !Ref SummarizationFunction
1484+
- LambdaInvokePolicy:
1485+
FunctionName: !Ref EvaluationFunctionArn
14741486
- CloudWatchLogsFullAccess
14751487

14761488
StateMachineLogGroup:

src/lambda/evaluation_function/index.py

Lines changed: 39 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -60,10 +60,10 @@ def update_document_evaluation_status(document: Document, status: EvaluationStat
6060

6161
def extract_document_from_event(event: Dict[str, Any]) -> Optional[Document]:
6262
"""
63-
Extract document from Lambda event
63+
Extract document from Lambda event (state machine format)
6464
6565
Args:
66-
event: Lambda event
66+
event: Lambda event containing document data
6767
6868
Returns:
6969
Document object or None if not found
@@ -72,17 +72,16 @@ def extract_document_from_event(event: Dict[str, Any]) -> Optional[Document]:
7272
ValueError: If document cannot be extracted from event
7373
"""
7474
try:
75-
output_data = json.loads(event['detail']['output'])
75+
# State machine format: event['document'] contains the document data
76+
document_data = event.get('document')
7677

77-
if not output_data:
78-
raise ValueError("No output data found in event")
78+
if not document_data:
79+
raise ValueError("No document data found in event")
7980

80-
# Get document from the final processing step
81+
# Get document from state machine format
8182
working_bucket = os.environ.get('WORKING_BUCKET')
82-
# look for document_data in either output_data.Result.document (Pattern-1) or output_data (others)
83-
document_data = output_data.get('Result',{}).get('document', output_data)
8483
document = Document.load_document(document_data, working_bucket, logger)
85-
logger.info(f"Successfully loaded actual document with {len(document.pages)} pages and {len(document.sections)} sections")
84+
logger.info(f"Successfully loaded document with {len(document.pages)} pages and {len(document.sections)} sections")
8685
return document
8786
except Exception as e:
8887
logger.error(f"Error extracting document from event: {str(e)}")
@@ -154,34 +153,43 @@ def handler(event, context):
154153
context: Lambda context
155154
156155
Returns:
157-
Response with evaluation results
156+
Document in state machine format: {'document': document.serialize_document()}
158157
"""
159158
actual_document = None
160159
start_time = time.time()
161160

162161
try:
163-
logger.info(f"Starting evaluation process with event: {json.dumps(event, indent=2)}")
162+
logger.info(f"Starting evaluation process")
164163

165164
# Extract document from event
166165
actual_document = extract_document_from_event(event)
167166

168-
# Update document status to RUNNING
167+
# Load configuration and check if evaluation is enabled
168+
config = get_config()
169+
evaluation_enabled = config.get('evaluation', {}).get('enabled', True)
170+
171+
if not evaluation_enabled:
172+
logger.info("Evaluation is disabled in configuration, skipping evaluation")
173+
# Return document unchanged
174+
return {'document': actual_document.serialize_document()}
175+
176+
# Set document status to EVALUATING before processing
177+
actual_document.status = Status.EVALUATING
178+
document_service.update_document(actual_document)
179+
180+
# Update document evaluation status to RUNNING
169181
update_document_evaluation_status(actual_document, EvaluationStatus.RUNNING)
170182

171183
# Load baseline document
172184
expected_document = load_baseline_document(actual_document.input_key)
173185

174186
# If no baseline document is found, update status and exit
175187
if not expected_document:
176-
update_document_evaluation_status(actual_document, EvaluationStatus.NO_BASELINE)
177-
return create_response(
178-
200,
179-
'Evaluation skipped - no baseline data available',
180-
{'document_key': actual_document.input_key}
181-
)
188+
actual_document = update_document_evaluation_status(actual_document, EvaluationStatus.NO_BASELINE)
189+
logger.info("Evaluation skipped - no baseline data available")
190+
return {'document': actual_document.serialize_document()}
182191

183-
# Load configuration and create evaluation service
184-
config = get_config()
192+
# Create evaluation service
185193
evaluation_service = evaluation.EvaluationService(config=config)
186194

187195
# Run evaluation
@@ -196,8 +204,8 @@ def handler(event, context):
196204
if evaluated_document.errors:
197205
error_msg = f"Evaluation encountered errors: {evaluated_document.errors}"
198206
logger.error(error_msg)
199-
update_document_evaluation_status(evaluated_document, EvaluationStatus.FAILED)
200-
return create_response(500, 'Evaluation failed', {'error': error_msg})
207+
evaluated_document = update_document_evaluation_status(evaluated_document, EvaluationStatus.FAILED)
208+
return {'document': evaluated_document.serialize_document()}
201209

202210
# Save evaluation results to reporting bucket for analytics using the SaveReportingData Lambda
203211
try:
@@ -224,18 +232,11 @@ def handler(event, context):
224232
# Continue execution - don't fail the entire function if reporting fails
225233

226234
# Update document evaluation status to COMPLETED
227-
update_document_evaluation_status(evaluated_document, EvaluationStatus.COMPLETED)
228-
logger.info("Evaluation process completed successfully")
229-
230-
# Return success response
231-
return create_response(
232-
200,
233-
'Evaluation completed successfully',
234-
{
235-
'report_location': evaluated_document.evaluation_report_uri,
236-
'execution_time': time.time() - start_time
237-
}
238-
)
235+
evaluated_document = update_document_evaluation_status(evaluated_document, EvaluationStatus.COMPLETED)
236+
logger.info(f"Evaluation process completed successfully in {time.time() - start_time:.2f} seconds")
237+
238+
# Return document in state machine format
239+
return {'document': evaluated_document.serialize_document()}
239240

240241
except Exception as e:
241242
error_msg = f"Error in lambda_handler: {str(e)}"
@@ -244,8 +245,10 @@ def handler(event, context):
244245
# Update document status to FAILED if we have the document
245246
if actual_document:
246247
try:
247-
update_document_evaluation_status(actual_document, EvaluationStatus.FAILED)
248+
actual_document = update_document_evaluation_status(actual_document, EvaluationStatus.FAILED)
249+
return {'document': actual_document.serialize_document()}
248250
except Exception as update_error:
249251
logger.error(f"Failed to update evaluation status: {str(update_error)}")
250252

251-
return create_response(500, 'Evaluation failed', {'error': error_msg})
253+
# Re-raise exception to let Step Functions handle the error
254+
raise

src/ui/src/components/step-function-flow/FlowDiagram.jsx

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,11 @@ const isStepDisabled = (stepName, config) => {
2222
return config.assessment?.enabled === false;
2323
}
2424

25+
// Check if this is an evaluation step
26+
if (stepNameLower.includes('evaluation') || stepNameLower.includes('evaluate')) {
27+
return config.evaluation?.enabled === false;
28+
}
29+
2530
return false;
2631
};
2732

@@ -238,6 +243,9 @@ FlowDiagram.propTypes = {
238243
assessment: PropTypes.shape({
239244
enabled: PropTypes.bool,
240245
}),
246+
evaluation: PropTypes.shape({
247+
enabled: PropTypes.bool,
248+
}),
241249
}),
242250
};
243251

0 commit comments

Comments
 (0)