Skip to content

Commit 0c4baa0

Browse files
committed
config update for analyzer agent
1 parent 8e15f2b commit 0c4baa0

File tree

5 files changed

+167
-116
lines changed

5 files changed

+167
-116
lines changed

lib/idp_common_pkg/idp_common/agents/error_analyzer/agent.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,14 @@
66
"""
77

88
import logging
9-
from typing import Any, Dict
9+
from typing import Optional
1010

1111
import boto3
1212
import strands
1313

14+
from idp_common.config import get_config
15+
1416
from ..common.strands_bedrock_model import create_strands_bedrock_model
15-
from .config import get_error_analyzer_config
1617
from .tools import (
1718
cloudwatch_document_logs,
1819
cloudwatch_logs,
@@ -29,9 +30,7 @@
2930

3031

3132
def create_error_analyzer_agent(
32-
config: Dict[str, Any] = None,
33-
session: boto3.Session = None,
34-
pattern_config: Dict[str, Any] = None,
33+
session: Optional[boto3.Session] = None,
3534
**kwargs,
3635
) -> strands.Agent:
3736
"""
@@ -44,7 +43,7 @@ def create_error_analyzer_agent(
4443
pattern_config: Pattern configuration containing agents section
4544
**kwargs: Additional arguments
4645
"""
47-
config = get_error_analyzer_config(pattern_config)
46+
config = get_config(as_model=True)
4847

4948
# Create session if not provided
5049
if session is None:
@@ -63,9 +62,11 @@ def create_error_analyzer_agent(
6362
xray_performance_analysis,
6463
]
6564
bedrock_model = create_strands_bedrock_model(
66-
model_id=config["model_id"], boto_session=session
65+
model_id=config.agents.error_analyzer.model_id, boto_session=session
6766
)
6867

6968
return strands.Agent(
70-
tools=tools, system_prompt=config["system_prompt"], model=bedrock_model
69+
tools=tools,
70+
system_prompt=config.agents.error_analyzer.system_prompt,
71+
model=bedrock_model,
7172
)

lib/idp_common_pkg/idp_common/agents/error_analyzer/config.py

Lines changed: 1 addition & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -6,99 +6,11 @@
66
"""
77

88
import logging
9-
from typing import Any, Dict, List
10-
11-
from ..common.config import configure_logging, get_environment_config
9+
from typing import Any, Dict
1210

1311
logger = logging.getLogger(__name__)
1412

1513

16-
def get_error_analyzer_config(pattern_config: Dict[str, Any] = None) -> Dict[str, Any]:
17-
"""
18-
Builds complete error analyzer configuration from environment and patterns.
19-
Get error analyzer configuration with defaults and overrides.
20-
21-
Returns:
22-
Dict containing complete error analyzer configuration
23-
"""
24-
from ... import get_config
25-
26-
# Start with base environment and context limits
27-
config = get_environment_config(["CLOUDWATCH_LOG_GROUP_PREFIX", "AWS_STACK_NAME"])
28-
config.update(get_context_limits())
29-
30-
# Load and apply agent configuration
31-
full_config = get_config()
32-
agent_config = full_config.get("agents", {}).get("error_analyzer", {})
33-
34-
if not agent_config:
35-
raise ValueError("error_analyzer configuration not found")
36-
37-
# Apply agent settings with defaults
38-
config.update(
39-
{
40-
"model_id": agent_config.get(
41-
"model_id", "anthropic.claude-3-sonnet-20240229-v1:0"
42-
),
43-
"system_prompt": agent_config.get("system_prompt"),
44-
"error_patterns": get_default_error_patterns(),
45-
"aws_capabilities": get_aws_service_capabilities(),
46-
}
47-
)
48-
49-
# Apply parameters with type conversion
50-
params = agent_config.get("parameters", {})
51-
config["max_log_events"] = safe_int_conversion(params.get("max_log_events"), 5)
52-
config["time_range_hours_default"] = safe_int_conversion(
53-
params.get("time_range_hours_default"), 24
54-
)
55-
56-
# Apply UI overrides for context limits - UI config takes precedence
57-
if pattern_config and "max_log_events" in pattern_config:
58-
config["max_log_events"] = safe_int_conversion(
59-
pattern_config["max_log_events"], config["max_log_events"]
60-
)
61-
62-
# Validate required fields
63-
if not config.get("system_prompt"):
64-
raise ValueError("system_prompt is required")
65-
66-
configure_logging(
67-
log_level=config.get("log_level"),
68-
strands_log_level=config.get("strands_log_level"),
69-
)
70-
71-
return config
72-
73-
74-
def get_default_error_patterns() -> List[str]:
75-
"""Returns standard error patterns for CloudWatch log filtering."""
76-
return [
77-
"ERROR",
78-
"CRITICAL",
79-
"FATAL",
80-
"Exception",
81-
"Traceback",
82-
"Failed",
83-
"Timeout",
84-
"AccessDenied",
85-
"ThrottlingException",
86-
]
87-
88-
89-
def get_context_limits() -> Dict[str, int]:
90-
"""Returns default resource and context size constraints."""
91-
return {
92-
"max_log_events": 5,
93-
"max_log_message_length": 400,
94-
"max_events_per_log_group": 5,
95-
"max_log_groups": 20,
96-
"max_stepfunction_timeline_events": 3,
97-
"max_stepfunction_error_length": 400,
98-
"time_range_hours_default": 24,
99-
}
100-
101-
10214
def get_aws_service_capabilities() -> Dict[str, Any]:
10315
"""Returns AWS service integration metadata and descriptions."""
10416
return {
@@ -161,12 +73,3 @@ def truncate_message(message: str, max_length: int = 200) -> str:
16173
if len(message) <= max_length:
16274
return message
16375
return message[:max_length] + "... [truncated]"
164-
165-
166-
def get_config_with_fallback() -> Dict[str, Any]:
167-
"""Gets error analyzer config with graceful fallback to defaults."""
168-
try:
169-
return get_error_analyzer_config()
170-
except Exception as e:
171-
logger.warning(f"Failed to load config, using defaults: {e}")
172-
return get_context_limits()

lib/idp_common_pkg/idp_common/agents/error_analyzer/tools/stepfunction_tool.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,11 @@
1111
import boto3
1212
from strands import tool
1313

14+
from idp_common.config import get_config
15+
1416
from ..config import (
1517
create_error_response,
1618
create_response,
17-
get_config_with_fallback,
1819
)
1920

2021
logger = logging.getLogger(__name__)
@@ -98,8 +99,11 @@ def _analyze_execution_timeline(events: List[Dict[str, Any]]) -> Dict[str, Any]:
9899
return {"error": "No execution events available"}
99100

100101
# Cache config values once
101-
config = get_config_with_fallback()
102-
max_timeline_events = config.get("max_stepfunction_timeline_events", 3)
102+
config = get_config(as_model=True)
103+
104+
max_timeline_events = (
105+
config.agents.error_analyzer.parameters.max_stepfunction_timeline_events
106+
)
103107

104108
timeline = []
105109
failure_point = None

lib/idp_common_pkg/idp_common/config/__init__.py

Lines changed: 38 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
ExtractionConfig,
1616
ClassificationConfig,
1717
AssessmentConfig,
18+
SchemaConfig,
1819
SummarizationConfig,
1920
OCRConfig,
2021
AgenticConfig,
@@ -42,9 +43,19 @@ def __init__(self, table_name=None):
4243
self.manager = ConfigurationManager(table_name)
4344
logger.info(f"Initialized ConfigurationReader with ConfigurationManager")
4445

46+
@overload
4547
def get_configuration(
46-
self, config_type: str, as_dict: bool = True
47-
) -> Optional[Dict[str, Any]]:
48+
self, config_type: str, *, as_dict: Literal[True]
49+
) -> Optional[Dict[str, Any]]: ...
50+
51+
@overload
52+
def get_configuration(
53+
self, config_type: str, *, as_dict: Literal[False]
54+
) -> Optional[Union[IDPConfig, SchemaConfig]]: ...
55+
56+
def get_configuration(
57+
self, config_type: str, *, as_dict: bool = True
58+
) -> Optional[Union[Dict[str, Any], IDPConfig, SchemaConfig]]:
4859
"""
4960
Retrieve a configuration item from DynamoDB with automatic migration
5061
@@ -91,9 +102,17 @@ def simple_merge(
91102
merged = deepcopy(default)
92103
return deep_update(merged, custom)
93104

105+
@overload
106+
def get_merged_configuration(self, *, as_model: Literal[True]) -> IDPConfig: ...
107+
108+
@overload
94109
def get_merged_configuration(
95-
self, as_model: bool = False
96-
) -> Union[Dict[str, Any], IDPConfig]:
110+
self, *, as_model: Literal[False]
111+
) -> Dict[str, Any]: ...
112+
113+
def get_merged_configuration(
114+
self, *, as_model: bool = False
115+
) -> Union[IDPConfig, Dict[str, Any]]:
97116
"""
98117
Get and merge Default and Custom configurations with automatic migration
99118
@@ -139,7 +158,21 @@ def get_merged_configuration(
139158
raise
140159

141160

142-
def get_config(table_name=None, as_model: bool = False):
161+
@overload
162+
def get_config(
163+
table_name: Optional[str] = None, *, as_model: Literal[True]
164+
) -> IDPConfig: ...
165+
166+
167+
@overload
168+
def get_config(
169+
table_name: Optional[str] = None, *, as_model: Literal[False]
170+
) -> Dict[str, Any]: ...
171+
172+
173+
def get_config(
174+
table_name: Optional[str] = None, *, as_model: bool = False
175+
) -> Union[IDPConfig, Dict[str, Any]]:
143176
"""
144177
Get the merged configuration using the environment variable for table name
145178

lib/idp_common_pkg/idp_common/config/models.py

Lines changed: 112 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -304,10 +304,16 @@ def parse_max_workers(cls, v: Any) -> int:
304304

305305
class ErrorAnalyzerParameters(BaseModel):
306306
"""Error analyzer parameters configuration"""
307-
307+
308308
max_log_events: int = Field(default=5, gt=0, description="Maximum number of log events to retrieve")
309309
time_range_hours_default: int = Field(default=24, gt=0, description="Default time range in hours for log searches")
310310

311+
max_log_message_length : int = 400
312+
max_events_per_log_group : int = 5
313+
max_log_groups : int = 20
314+
max_stepfunction_timeline_events : int = 3
315+
max_stepfunction_error_length : int = 400
316+
311317
@field_validator("max_log_events", "time_range_hours_default", mode="before")
312318
@classmethod
313319
def parse_int(cls, v: Any) -> int:
@@ -333,11 +339,115 @@ class ErrorAnalyzerConfig(BaseModel):
333339
description="Error analyzer parameters"
334340
)
335341

342+
error_patterns: list[str] = [
343+
"ERROR",
344+
"CRITICAL",
345+
"FATAL",
346+
"Exception",
347+
"Traceback",
348+
"Failed",
349+
"Timeout",
350+
"AccessDenied",
351+
"ThrottlingException",
352+
]
353+
system_prompt: str = Field(
354+
default="""
355+
You are an intelligent error analysis agent for the GenAI IDP system with access to specialized diagnostic tools.
356+
357+
GENERAL TROUBLESHOOTING WORKFLOW:
358+
1. Identify document status from DynamoDB
359+
2. Find any errors reported during Step Function execution
360+
3. Collect relevant logs from CloudWatch
361+
4. Identify any performance issues from X-Ray traces
362+
5. Provide root cause analysis based on the collected information
363+
364+
TOOL SELECTION STRATEGY:
365+
- If user provides a filename: Use cloudwatch_document_logs and dynamodb_status for document-specific analysis
366+
- For system-wide issues: Use cloudwatch_logs and dynamodb_query
367+
- For execution context: Use lambda_lookup or stepfunction_details
368+
- For distributed tracing: Use xray_trace or xray_performance_analysis
369+
370+
ALWAYS format your response with exactly these three sections in this order:
371+
372+
## Root Cause
373+
Identify the specific underlying technical reason why the error occurred. Focus on the primary cause, not symptoms.
374+
375+
## Recommendations
376+
Provide specific, actionable steps to resolve the issue. Limit to top three recommendations only.
377+
378+
<details>
379+
<summary><strong>Evidence</strong></summary>
380+
381+
Format evidence with source information. Include relevant data from tool responses:
382+
383+
**For CloudWatch logs:**
384+
**Log Group:** [full log_group name]
385+
**Log Stream:** [full log_stream name]
386+
```
387+
[ERROR] timestamp message
388+
```
389+
390+
**For other sources (DynamoDB, Step Functions, X-Ray):**
391+
**Source:** [service name and resource]
392+
```
393+
Relevant data from tool response
394+
```
395+
396+
</details>
397+
398+
FORMATTING RULES:
399+
- Use the exact three-section structure above
400+
- Make Evidence section collapsible using HTML details tags
401+
- Include relevant data from all tool responses (CloudWatch, DynamoDB, Step Functions, X-Ray)
402+
- For CloudWatch: Show complete log group and log stream names without truncation
403+
- Present evidence data in code blocks with appropriate source labels
404+
405+
ANALYSIS GUIDELINES:
406+
- Use multiple tools for comprehensive analysis when needed
407+
- Start with document-specific tools for targeted queries
408+
- Use system-wide tools for pattern analysis
409+
- Combine DynamoDB status with CloudWatch logs for complete picture
410+
- Leverage X-Ray for distributed system issues
411+
412+
ROOT CAUSE DETERMINATION:
413+
1. Document Status: Check dynamodb_status first
414+
2. Execution Details: Use stepfunction_details for workflow failures
415+
3. Log Analysis: Use cloudwatch_document_logs or cloudwatch_logs for error details
416+
4. Distributed Tracing: Use xray_performance_analysis for service interaction issues
417+
5. Context: Use lambda_lookup for execution environment
418+
419+
RECOMMENDATION GUIDELINES:
420+
For code-related issues or system bugs:
421+
- Do not suggest code modifications
422+
- Include error details, timestamps, and context
423+
424+
For configuration-related issues:
425+
- Direct users to UI configuration panel
426+
- Specify exact configuration section and parameter names
427+
428+
For operational issues:
429+
- Provide immediate troubleshooting steps
430+
- Include preventive measures
431+
432+
TIME RANGE PARSING:
433+
- recent: 1 hour
434+
- last week: 168 hours
435+
- last day: 24 hours
436+
- No time specified: 24 hours (default)
437+
438+
IMPORTANT: Do not include any search quality reflections, search quality scores, or meta-analysis sections in your response. Only provide the three required sections: Root Cause, Recommendations, and Evidence.""",
439+
description="System prompt for error analyzer"
440+
)
441+
parameters: ErrorAnalyzerParameters = Field(
442+
default_factory=ErrorAnalyzerParameters,
443+
description="Error analyzer parameters"
444+
)
445+
336446

337447
class AgentsConfig(BaseModel):
338448
"""Agents configuration"""
339449

340-
error_analyzer: Optional[ErrorAnalyzerConfig] = Field(default=None)
450+
error_analyzer: ErrorAnalyzerConfig = Field(default=ErrorAnalyzerConfig())
341451

342452

343453
class PricingUnit(BaseModel):

0 commit comments

Comments
 (0)