Skip to content

Commit b9d4181

Browse files
authored
Improve Document Schema builder, Temperature and Top P passing (#148)
* enum fixes for the schema builder * passing in top_p and temperature for agentic * better labels for documen schema builder * improve retry mechanism * fix undefined items * typefix * improve typing
1 parent c936650 commit b9d4181

File tree

6 files changed

+196
-66
lines changed

6 files changed

+196
-66
lines changed

lib/idp_common_pkg/idp_common/extraction/agentic_idp.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1002,7 +1002,11 @@ async def structured_output_async(
10021002
# Track token usage
10031003
token_usage = _initialize_token_usage()
10041004
agent = Agent(
1005-
model=BedrockModel(**model_config), # pyright: ignore[reportArgumentType]
1005+
model=BedrockModel(
1006+
**model_config,
1007+
temperature=config.extraction.temperature,
1008+
top_p=config.extraction.top_p,
1009+
), # pyright: ignore[reportArgumentType]
10061010
tools=tools,
10071011
system_prompt=final_system_prompt,
10081012
state={
@@ -1092,7 +1096,7 @@ async def structured_output_async(
10921096
)
10931097

10941098
review_response = await invoke_agent_with_retry(
1095-
agent=agent, input=review_prompt
1099+
agent=agent, input=[review_prompt]
10961100
)
10971101
logger.debug("Review response received", extra={"review_completed": True})
10981102

lib/idp_common_pkg/idp_common/utils/bedrock_utils.py

Lines changed: 66 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,42 @@
2020
InvokeModelResponseTypeDef,
2121
)
2222

23+
# Optional import for strands-agents (may not be installed in all environments)
24+
try:
25+
from strands.types.exceptions import ModelThrottledException
26+
27+
_STRANDS_AVAILABLE = True
28+
except ImportError:
29+
_STRANDS_AVAILABLE = False
30+
# Create a placeholder exception class that will never match
31+
ModelThrottledException = type("ModelThrottledException", (Exception,), {}) # type: ignore[misc, assignment]
32+
2333
# Configure logger
2434
logger = logging.getLogger(__name__)
2535
logger.setLevel(os.environ.get("LOG_LEVEL", "INFO"))
2636

37+
# Default retryable error codes (matched against ClientError codes and exception messages)
38+
DEFAULT_RETRYABLE_ERRORS = {
39+
"ThrottlingException",
40+
"throttlingException",
41+
"ModelThrottledException", # Strands wrapper for throttling
42+
"ModelErrorException",
43+
"ValidationException",
44+
"ServiceQuotaExceededException",
45+
"RequestLimitExceeded",
46+
"TooManyRequestsException",
47+
"ServiceUnavailableException",
48+
"serviceUnavailableException", # lowercase variant from EventStreamError
49+
"RequestTimeout",
50+
"RequestTimeoutException",
51+
}
52+
53+
# Default retryable exception types (caught by isinstance check)
54+
# Only include ModelThrottledException if strands is available
55+
DEFAULT_RETRYABLE_EXCEPTION_TYPES: tuple[type[Exception], ...] = (
56+
(ModelThrottledException,) if _STRANDS_AVAILABLE else ()
57+
)
58+
2759

2860
def async_exponential_backoff_retry[T, **P](
2961
max_retries: int = 5,
@@ -32,23 +64,13 @@ def async_exponential_backoff_retry[T, **P](
3264
exponential_base: float = 2.0,
3365
jitter: float = 0.1,
3466
retryable_errors: set[str] | None = None,
67+
retryable_exception_types: tuple[type[Exception], ...] | None = None,
3568
) -> Callable[[Callable[P, Awaitable[T]]], Callable[P, Awaitable[T]]]:
36-
if not retryable_errors:
37-
retryable_errors = set(
38-
[
39-
"ThrottlingException",
40-
"throttlingException",
41-
"ModelErrorException",
42-
"ValidationException",
43-
"ServiceQuotaExceededException",
44-
"RequestLimitExceeded",
45-
"TooManyRequestsException",
46-
"ServiceUnavailableException",
47-
"serviceUnavailableException", # lowercase variant from EventStreamError
48-
"RequestTimeout",
49-
"RequestTimeoutException",
50-
]
51-
)
69+
# Use defaults if not provided
70+
if retryable_errors is None:
71+
retryable_errors = DEFAULT_RETRYABLE_ERRORS
72+
if retryable_exception_types is None:
73+
retryable_exception_types = DEFAULT_RETRYABLE_EXCEPTION_TYPES
5274

5375
def decorator(func: Callable[P, Awaitable[T]]) -> Callable[P, Awaitable[T]]:
5476
@wraps(func)
@@ -104,7 +126,34 @@ def log_bedrock_invocation_error(error: Exception, attempt_num: int):
104126
await asyncio.sleep(sleep_time)
105127
delay = min(delay * exponential_base, max_delay)
106128
except Exception as e:
107-
# Log bedrock invocation details for non-ClientError exceptions too
129+
# Check if this is a retryable exception type (e.g., Strands ModelThrottledException)
130+
is_retryable_type = retryable_exception_types and isinstance(
131+
e, retryable_exception_types
132+
)
133+
134+
# Also check if exception name or message contains retryable error patterns
135+
exception_name = type(e).__name__
136+
exception_str = str(e)
137+
is_retryable_name = exception_name in retryable_errors or any(
138+
err in exception_str for err in retryable_errors
139+
)
140+
141+
if (
142+
is_retryable_type or is_retryable_name
143+
) and attempt < max_retries - 1:
144+
# Log and retry
145+
log_bedrock_invocation_error(e, attempt + 1)
146+
jitter_value = random.uniform(-jitter, jitter)
147+
sleep_time = max(0.1, delay * (1 + jitter_value))
148+
logger.warning(
149+
f"{exception_name}: {exception_str} encountered in {func.__name__}. "
150+
f"Retrying in {sleep_time:.2f} seconds. Attempt {attempt + 1}/{max_retries}"
151+
)
152+
await asyncio.sleep(sleep_time)
153+
delay = min(delay * exponential_base, max_delay)
154+
continue
155+
156+
# Log bedrock invocation details for non-retryable exceptions
108157
log_bedrock_invocation_error(e, attempt + 1)
109158
raise
110159

src/ui/src/components/json-schema-builder/SchemaCanvas.jsx

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,12 +97,16 @@ const SortableAttributeItem = ({
9797
};
9898

9999
const getConstBadge = () => {
100-
if (attribute.const === undefined) return null;
100+
// Check both attribute level and items level (for simple arrays)
101+
const hasConst = attribute.const !== undefined || (attribute.type === 'array' && attribute.items?.const !== undefined);
102+
if (!hasConst) return null;
101103
return <Badge color="blue">const</Badge>;
102104
};
103105

104106
const getEnumBadge = () => {
105-
if (!attribute.enum) return null;
107+
// Check both attribute level and items level (for simple arrays)
108+
const hasEnum = attribute.enum || (attribute.type === 'array' && attribute.items?.enum);
109+
if (!hasEnum) return null;
106110
return <Badge color="blue">enum</Badge>;
107111
};
108112

src/ui/src/components/json-schema-builder/constraints/StringConstraints.jsx

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ const StringConstraints = ({ attribute, onUpdate }) => {
88

99
return (
1010
<>
11-
<Header variant="h4">String Constraints</Header>
11+
<Header variant="h4">String Constraints (JSON Schema)</Header>
1212

1313
<FormField label="Pattern (regex)" description="Regular expression pattern to validate the extracted string format">
1414
<Input
@@ -18,7 +18,11 @@ const StringConstraints = ({ attribute, onUpdate }) => {
1818
/>
1919
</FormField>
2020

21-
<FormField label="Format" description="Standard format to validate against (e.g., date, email, uri)">
21+
<FormField
22+
label="Format (JSON Schema)"
23+
description="JSON Schema built-in format validation. Values must match the specified format exactly."
24+
constraintText="Select a format to enforce validation on extracted values"
25+
>
2226
<Select
2327
selectedOption={FORMAT_OPTIONS.find((opt) => opt.value === (attribute.format || '')) || FORMAT_OPTIONS[0]}
2428
onChange={({ detail }) => onUpdate({ format: detail.selectedOption.value || undefined })}

src/ui/src/components/json-schema-builder/constraints/ValueConstraints.jsx

Lines changed: 90 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,26 +8,53 @@ const ValueConstraints = ({ attribute, onUpdate }) => {
88
const [constInput, setConstInput] = useState('');
99
const [enumInput, setEnumInput] = useState('');
1010

11+
// For arrays with simple item types (not $ref), enum/const should be on items, not the array itself
12+
const isSimpleArray = attribute.type === 'array' && attribute.items && !attribute.items.$ref;
13+
const effectiveType = isSimpleArray ? attribute.items?.type : attribute.type;
14+
15+
// Get enum value from the correct location (items for simple arrays, attribute otherwise)
16+
const currentEnum = isSimpleArray ? attribute.items?.enum : attribute.enum;
17+
const currentConst = isSimpleArray ? attribute.items?.const : attribute.const;
18+
1119
// Initialize local state from attribute values
1220
useEffect(() => {
13-
setConstInput(formatValueForInput(attribute.const));
14-
}, [attribute.const]);
21+
setConstInput(formatValueForInput(currentConst));
22+
}, [currentConst]);
1523

1624
// Initialize enum input as empty (it's only shown when no enum exists yet)
1725
useEffect(() => {
18-
if (!attribute.enum || attribute.enum.length === 0) {
26+
if (!currentEnum || currentEnum.length === 0) {
1927
setEnumInput('');
2028
}
21-
}, [attribute.enum]);
29+
}, [currentEnum]);
30+
31+
// Helper to update enum/const at the correct level (items for simple arrays)
32+
const updateValueConstraint = (updates) => {
33+
if (isSimpleArray) {
34+
// Place enum/const inside items for simple arrays
35+
// Need to handle undefined values by explicitly removing keys
36+
const newItems = { ...attribute.items };
37+
Object.keys(updates).forEach((key) => {
38+
if (updates[key] === undefined) {
39+
delete newItems[key];
40+
} else {
41+
newItems[key] = updates[key];
42+
}
43+
});
44+
onUpdate({ items: newItems });
45+
} else {
46+
onUpdate(updates);
47+
}
48+
};
2249

2350
// Handle Const field blur - parse and update parent state
2451
const handleConstBlur = () => {
2552
if (!constInput) {
26-
onUpdate({ const: undefined });
53+
updateValueConstraint({ const: undefined });
2754
return;
2855
}
29-
const parsed = parseInputValue(constInput, attribute.type);
30-
onUpdate({ const: parsed });
56+
const parsed = parseInputValue(constInput, effectiveType);
57+
updateValueConstraint({ const: parsed });
3158
};
3259

3360
// Handle Enum field blur - parse and update parent state
@@ -36,66 +63,101 @@ const ValueConstraints = ({ attribute, onUpdate }) => {
3663
if (value) {
3764
try {
3865
const parsed = JSON.parse(`[${value}]`);
39-
onUpdate({ enum: parsed });
66+
updateValueConstraint({ enum: parsed });
4067
} catch {
4168
const enumValues = value
4269
.split(',')
4370
.map((v) => v.trim())
4471
.filter((v) => v);
45-
onUpdate({ enum: enumValues.length > 0 ? enumValues : undefined });
72+
updateValueConstraint({ enum: enumValues.length > 0 ? enumValues : undefined });
4673
}
4774
// Clear the input after successful processing
4875
setEnumInput('');
4976
}
5077
};
5178

79+
// Get placeholder examples based on effective type
80+
const getEnumPlaceholder = () => {
81+
switch (effectiveType) {
82+
case 'number':
83+
case 'integer':
84+
return 'e.g., 1, 2, 3';
85+
case 'boolean':
86+
return 'e.g., true, false';
87+
default:
88+
return 'e.g., active, pending, completed';
89+
}
90+
};
91+
92+
const getConstPlaceholder = () => {
93+
switch (effectiveType) {
94+
case 'number':
95+
case 'integer':
96+
return 'e.g., 42';
97+
case 'boolean':
98+
return 'e.g., true';
99+
default:
100+
return 'e.g., active';
101+
}
102+
};
103+
104+
// Build description with JSON Schema context
105+
const enumDescription = isSimpleArray
106+
? 'Allowed values for each item in the array (JSON Schema enum). Comma-separated list.'
107+
: 'Allowed values for this field (JSON Schema enum). Comma-separated list.';
108+
109+
const constDescription = isSimpleArray
110+
? 'Each item in the array must be exactly this value (JSON Schema const).'
111+
: 'Field must be exactly this value (JSON Schema const).';
112+
52113
return (
53114
<>
54-
<Header variant="h4">Value Constraints</Header>
115+
<Header variant="h4">Value Constraints (JSON Schema)</Header>
55116

56-
<FormField label="Const (Single Constant Value)" description="Field must be exactly this value">
117+
<FormField label="Const (Single Constant Value)" description={constDescription} constraintText={`Example: ${getConstPlaceholder()}`}>
57118
<Input
58119
value={constInput}
59120
onChange={({ detail }) => setConstInput(detail.value)}
60121
onBlur={handleConstBlur}
61-
placeholder='e.g., "active", 42, or JSON value'
62-
disabled={attribute.enum && attribute.enum.length > 0}
122+
placeholder={getConstPlaceholder()}
123+
disabled={currentEnum && currentEnum.length > 0}
63124
/>
64125
</FormField>
65126

66127
<FormField
67-
label="Enum Values (Multiple Allowed Values)"
68-
description="Comma-separated list of allowed values (mutually exclusive with const)"
128+
label="Enum (Allowed Values)"
129+
description={enumDescription}
130+
constraintText={`Example: ${getEnumPlaceholder()} - Values are comma-separated`}
69131
>
70-
{attribute.enum && attribute.enum.length > 0 ? (
132+
{currentEnum && currentEnum.length > 0 ? (
71133
<SpaceBetween size="xs">
72134
<TokenGroup
73-
items={attribute.enum.map((val) => ({
135+
items={currentEnum.map((val) => ({
74136
label: typeof val === 'object' ? JSON.stringify(val) : String(val),
75137
dismissLabel: `Remove ${val}`,
76138
}))}
77139
onDismiss={({ detail: { itemIndex } }) => {
78-
const newEnum = [...(attribute.enum || [])];
140+
const newEnum = [...(currentEnum || [])];
79141
newEnum.splice(itemIndex, 1);
80-
onUpdate({ enum: newEnum.length > 0 ? newEnum : undefined });
142+
updateValueConstraint({ enum: newEnum.length > 0 ? newEnum : undefined });
81143
}}
82144
/>
83145
<Button
84146
variant="link"
85147
onClick={() => {
86-
onUpdate({ enum: undefined });
148+
updateValueConstraint({ enum: undefined });
87149
}}
88150
>
89151
Clear all enum values
90152
</Button>
91153
</SpaceBetween>
92154
) : (
93155
<Input
94-
placeholder="value1, value2, value3"
156+
placeholder={getEnumPlaceholder()}
95157
value={enumInput}
96158
onChange={({ detail }) => setEnumInput(detail.value)}
97159
onBlur={handleEnumBlur}
98-
disabled={attribute.const !== undefined}
160+
disabled={currentConst !== undefined}
99161
/>
100162
)}
101163
</FormField>
@@ -108,6 +170,12 @@ ValueConstraints.propTypes = {
108170
type: PropTypes.string,
109171
const: PropTypes.oneOfType([PropTypes.string, PropTypes.number, PropTypes.bool, PropTypes.object, PropTypes.array]),
110172
enum: PropTypes.arrayOf(PropTypes.oneOfType([PropTypes.string, PropTypes.number, PropTypes.bool, PropTypes.object, PropTypes.array])),
173+
items: PropTypes.shape({
174+
type: PropTypes.string,
175+
$ref: PropTypes.string,
176+
const: PropTypes.oneOfType([PropTypes.string, PropTypes.number, PropTypes.bool, PropTypes.object, PropTypes.array]),
177+
enum: PropTypes.arrayOf(PropTypes.oneOfType([PropTypes.string, PropTypes.number, PropTypes.bool, PropTypes.object, PropTypes.array])),
178+
}),
111179
}).isRequired,
112180
onUpdate: PropTypes.func.isRequired,
113181
};

0 commit comments

Comments
 (0)