Skip to content

Commit 6c23a65

Browse files
author
Bob Strahan
committed
Merge branch 'develop' of ssh.gitlab.aws.dev:genaiic-reusable-assets/engagement-artifacts/genaiic-idp-accelerator into develop
2 parents 963e3c9 + 6c13259 commit 6c23a65

File tree

6 files changed

+835
-10
lines changed

6 files changed

+835
-10
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,13 @@ SPDX-License-Identifier: MIT-0
99

1010
## [0.4.3]
1111

12+
1213
### Fixed
1314

1415
- Add ServiceUnavailableException to retryable exceptions in statemachine to better defend against processing failure due to quota overload
16+
- Evaluation Configuration Robustness
17+
- Improved JSON Schema error messages with actionable diagnostics when configuration issues occur
18+
- Added automatic type coercion for numeric constraints (e.g., `maxItems: "7"``maxItems: 7`) to handle common YAML parsing quirks gracefully
1519
- Fix #133 - Cast topK to int to defend against transient ValidationException exceptions
1620
- Fix #132 - TRACKING_TABLE environment variable needed in EvaluationFunction
1721
- Fix #131 - HITL functions broken post docker migration

VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
0.4.3-wip3
1+
0.4.3-wip4

lib/idp_common_pkg/idp_common/evaluation/service.py

Lines changed: 87 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -378,17 +378,95 @@ def _get_stickler_model(
378378
schema = stickler_config["schema"]
379379
model_name = stickler_config["model_name"]
380380

381-
logger.info(f"Creating Stickler model for class: {document_class}")
382-
383-
# Use JsonSchemaFieldConverter to handle the full JSON Schema natively
384-
from stickler.structured_object_evaluator.models.json_schema_field_converter import (
385-
JsonSchemaFieldConverter,
381+
# Enhanced logging: Log schema details before creating model
382+
logger.info(
383+
f"Creating Stickler model for class: {document_class}\n"
384+
f" Schema summary:\n"
385+
f" - Properties: {list(schema.get('properties', {}).keys())}\n"
386+
f" - Required fields: {schema.get('required', [])}\n"
387+
f" - Schema ID: {schema.get('$id', 'N/A')}\n"
388+
f" - Model name: {model_name}"
386389
)
387390

388-
converter = JsonSchemaFieldConverter(schema)
389-
field_definitions = converter.convert_properties_to_fields(
390-
schema.get("properties", {}), schema.get("required", [])
391-
)
391+
# Log expected and actual data structure for troubleshooting
392+
if expected_data:
393+
logger.info(
394+
f" Expected data keys for {document_class}: {list(expected_data.keys())}"
395+
)
396+
397+
try:
398+
# Use JsonSchemaFieldConverter to handle the full JSON Schema natively
399+
from stickler.structured_object_evaluator.models.json_schema_field_converter import (
400+
JsonSchemaFieldConverter,
401+
)
402+
403+
logger.debug(f"Converting schema properties for {document_class}")
404+
405+
converter = JsonSchemaFieldConverter(schema)
406+
field_definitions = converter.convert_properties_to_fields(
407+
schema.get("properties", {}), schema.get("required", [])
408+
)
409+
410+
logger.info(
411+
f"Successfully converted schema for {document_class} with {len(field_definitions)} fields"
412+
)
413+
414+
except Exception as e:
415+
# Enhanced error handling with user guidance
416+
import json
417+
import re
418+
419+
error_message = str(e)
420+
421+
# Check if it's a JSON Schema validation error
422+
if (
423+
"jsonschema.exceptions.SchemaError" in str(type(e))
424+
or "Invalid JSON Schema" in error_message
425+
):
426+
# Try to extract the problematic field from the error
427+
field_match = re.search(
428+
r"On schema\['properties'\]\['([^']+)'\]", error_message
429+
)
430+
field_name = field_match.group(1) if field_match else "unknown"
431+
432+
# Parse for constraint information
433+
constraint_match = re.search(
434+
r"\['([^']+)'\]\s*:\s*'([^']+)'", error_message
435+
)
436+
constraint = (
437+
constraint_match.group(1) if constraint_match else "unknown"
438+
)
439+
bad_value = constraint_match.group(2) if constraint_match else "unknown"
440+
441+
# Build helpful error message
442+
helpful_message = (
443+
f"Invalid JSON Schema for document class '{document_class}'.\n\n"
444+
f"Problem detected:\n"
445+
f" Field: {field_name}\n"
446+
f" Constraint: {constraint}\n"
447+
f" Current value: '{bad_value}' (type: {type(bad_value).__name__})\n\n"
448+
f"Common fixes:\n"
449+
f" 1. If '{constraint}' should be a number, remove quotes in your config:\n"
450+
f" {constraint}: '{bad_value}' → {constraint}: {bad_value}\n"
451+
f" 2. Check your config YAML for field '{field_name}' in class '{document_class}'\n"
452+
f" 3. Ensure all numeric constraints (maxItems, minItems, minimum, maximum, etc.) are numbers, not strings\n\n"
453+
f"Original error: {error_message}"
454+
)
455+
456+
logger.error(helpful_message)
457+
logger.error(
458+
f"Full schema that caused the error:\n{json.dumps(schema, indent=2, default=str)}"
459+
)
460+
raise ValueError(helpful_message) from e
461+
else:
462+
# Re-raise other errors with schema details
463+
logger.error(
464+
f"Unexpected error creating Stickler model for {document_class}: {error_message}"
465+
)
466+
logger.error(
467+
f"Schema being processed:\n{json.dumps(schema, indent=2, default=str)}"
468+
)
469+
raise
392470

393471
# Create the model using Pydantic's create_model
394472
from pydantic import create_model

lib/idp_common_pkg/idp_common/evaluation/stickler_mapper.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,83 @@ def _coerce_to_float(cls, value: Any, field_name: str = "") -> float:
294294
f"Field '{field_name}': Expected numeric value, got {type(value).__name__}"
295295
)
296296

297+
@classmethod
298+
def _coerce_json_schema_types(
299+
cls, schema: Dict[str, Any], field_path: str = ""
300+
) -> None:
301+
"""
302+
Coerce string values to proper JSON Schema types.
303+
304+
This fixes common issues where numeric constraints are provided as strings
305+
instead of numbers (e.g., maxItems: '7' should be maxItems: 7).
306+
307+
Args:
308+
schema: Schema to coerce (modified in-place)
309+
field_path: Current path for error messages
310+
"""
311+
if not isinstance(schema, dict):
312+
return
313+
314+
# Numeric constraints that must be integers
315+
INTEGER_CONSTRAINTS = [
316+
"maxItems",
317+
"minItems",
318+
"maxLength",
319+
"minLength",
320+
"maxProperties",
321+
"minProperties",
322+
"multipleOf",
323+
]
324+
325+
# Numeric constraints that must be numbers (int or float)
326+
NUMBER_CONSTRAINTS = [
327+
"minimum",
328+
"maximum",
329+
"exclusiveMinimum",
330+
"exclusiveMaximum",
331+
]
332+
333+
for key, value in list(schema.items()):
334+
# Coerce integer constraints
335+
if key in INTEGER_CONSTRAINTS and isinstance(value, str):
336+
try:
337+
schema[key] = int(value)
338+
logger.info(
339+
f"Field '{field_path}': Coerced {key} from string '{value}' to integer {schema[key]}"
340+
)
341+
except ValueError:
342+
logger.error(
343+
f"Field '{field_path}': Cannot coerce {key}='{value}' to integer. "
344+
f"This will cause validation errors."
345+
)
346+
347+
# Coerce number constraints
348+
elif key in NUMBER_CONSTRAINTS and isinstance(value, str):
349+
try:
350+
schema[key] = float(value)
351+
logger.info(
352+
f"Field '{field_path}': Coerced {key} from string '{value}' to float {schema[key]}"
353+
)
354+
except ValueError:
355+
logger.error(
356+
f"Field '{field_path}': Cannot coerce {key}='{value}' to number. "
357+
f"This will cause validation errors."
358+
)
359+
360+
# Recursively process nested schemas
361+
if SCHEMA_PROPERTIES in schema:
362+
for prop_name, prop_schema in schema[SCHEMA_PROPERTIES].items():
363+
prop_path = f"{field_path}.{prop_name}" if field_path else prop_name
364+
cls._coerce_json_schema_types(prop_schema, prop_path)
365+
366+
if SCHEMA_ITEMS in schema:
367+
items_path = f"{field_path}[]" if field_path else "items"
368+
cls._coerce_json_schema_types(schema[SCHEMA_ITEMS], items_path)
369+
370+
if "$defs" in schema:
371+
for def_name, def_schema in schema["$defs"].items():
372+
cls._coerce_json_schema_types(def_schema, f"$defs.{def_name}")
373+
297374
@classmethod
298375
def _translate_extensions_in_schema(
299376
cls, schema: Dict[str, Any], field_path: str = ""
@@ -323,6 +400,9 @@ def _translate_extensions_in_schema(
323400
if not isinstance(schema, dict):
324401
return schema
325402

403+
# Coerce types FIRST, before any other processing
404+
cls._coerce_json_schema_types(schema, field_path)
405+
326406
# If this is an object with properties but no required array, add empty one
327407
# This makes all fields optional, allowing None values
328408
if schema.get(SCHEMA_TYPE) == TYPE_OBJECT and SCHEMA_PROPERTIES in schema:

0 commit comments

Comments
 (0)