Skip to content

Commit 8edde88

Browse files
committed
Merge branch 'bugfix/agent-serviceunavailable-improvements' into 'develop'
Agent Chat Service Unavailable Error handling See merge request genaiic-reusable-assets/engagement-artifacts/genaiic-idp-accelerator!455
2 parents 2a7b1f9 + d84b769 commit 8edde88

File tree

7 files changed

+880
-84
lines changed

7 files changed

+880
-84
lines changed
Lines changed: 349 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,349 @@
1+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
# SPDX-License-Identifier: MIT-0
3+
4+
"""
5+
Bedrock Error Message Handler
6+
7+
Provides user-friendly error messages and actionable recommendations for Bedrock service errors.
8+
Converts technical error codes and exceptions into clear, understandable messages for end users.
9+
"""
10+
11+
import logging
12+
import re
13+
from dataclasses import dataclass
14+
from typing import Dict, Optional
15+
16+
import botocore.exceptions
17+
18+
logger = logging.getLogger(__name__)
19+
20+
21+
@dataclass
22+
class BedrockErrorInfo:
23+
"""
24+
Structured information about a Bedrock error for user display.
25+
26+
Attributes:
27+
error_type: Category of error (service, throttling, validation, etc.)
28+
user_message: User-friendly error message
29+
technical_details: Technical error information for debugging
30+
retry_recommended: Whether the user should retry the operation
31+
retry_delay_seconds: Recommended delay before retry (if applicable)
32+
action_recommendations: List of actions the user can take
33+
is_transient: Whether this is likely a temporary issue
34+
"""
35+
36+
error_type: str
37+
user_message: str
38+
technical_details: str
39+
retry_recommended: bool
40+
retry_delay_seconds: Optional[int] = None
41+
action_recommendations: Optional[list[str]] = None
42+
is_transient: bool = True
43+
44+
45+
class BedrockErrorMessageHandler:
46+
"""
47+
Handles conversion of Bedrock errors to user-friendly messages.
48+
49+
This class provides methods to analyze Bedrock exceptions and generate
50+
appropriate user-facing error messages with actionable recommendations.
51+
"""
52+
53+
# Error type mappings for different Bedrock error codes
54+
ERROR_MAPPINGS: Dict[str, BedrockErrorInfo] = {
55+
"serviceUnavailableException": BedrockErrorInfo(
56+
error_type="service_unavailable",
57+
user_message="The AI service is temporarily unavailable. This is usually a temporary issue that resolves quickly.",
58+
technical_details="Bedrock service unavailable",
59+
retry_recommended=True,
60+
retry_delay_seconds=30,
61+
action_recommendations=[
62+
"Wait a moment and try your request again",
63+
"Check if the issue persists after a few minutes",
64+
"Contact support if the problem continues",
65+
],
66+
is_transient=True,
67+
),
68+
"ServiceUnavailableException": BedrockErrorInfo(
69+
error_type="service_unavailable",
70+
user_message="The AI service is temporarily unavailable. This is usually a temporary issue that resolves quickly.",
71+
technical_details="Bedrock service unavailable",
72+
retry_recommended=True,
73+
retry_delay_seconds=30,
74+
action_recommendations=[
75+
"Wait a moment and try your request again",
76+
"Check if the issue persists after a few minutes",
77+
"Contact support if the problem continues",
78+
],
79+
is_transient=True,
80+
),
81+
"ThrottlingException": BedrockErrorInfo(
82+
error_type="rate_limit",
83+
user_message="Too many requests are being processed right now. Please wait a moment before trying again.",
84+
technical_details="API rate limit exceeded",
85+
retry_recommended=True,
86+
retry_delay_seconds=60,
87+
action_recommendations=[
88+
"Wait 1-2 minutes before retrying",
89+
"Reduce the frequency of your requests",
90+
"Try again during off-peak hours",
91+
],
92+
is_transient=True,
93+
),
94+
"ModelThrottledException": BedrockErrorInfo(
95+
error_type="model_throttling",
96+
user_message="The AI model is currently handling too many requests. Please wait a moment before trying again.",
97+
technical_details="Model throttling limit reached",
98+
retry_recommended=True,
99+
retry_delay_seconds=45,
100+
action_recommendations=[
101+
"Wait 1-2 minutes before retrying",
102+
"Try your request again in a few moments",
103+
"Consider breaking large requests into smaller parts",
104+
],
105+
is_transient=True,
106+
),
107+
"ValidationException": BedrockErrorInfo(
108+
error_type="validation_error",
109+
user_message="There was an issue with your request. Please check your input and try again.",
110+
technical_details="Request validation failed",
111+
retry_recommended=False,
112+
action_recommendations=[
113+
"Check that your message is not too long",
114+
"Ensure your request doesn't contain inappropriate content",
115+
"Try rephrasing your question",
116+
],
117+
is_transient=False,
118+
),
119+
"AccessDeniedException": BedrockErrorInfo(
120+
error_type="access_denied",
121+
user_message="Access to the AI service is currently restricted. Please contact your administrator.",
122+
technical_details="Insufficient permissions for Bedrock access",
123+
retry_recommended=False,
124+
action_recommendations=[
125+
"Contact your system administrator",
126+
"Verify your account has proper permissions",
127+
"Check if your organization's AI usage policy allows this request",
128+
],
129+
is_transient=False,
130+
),
131+
"ModelNotReadyException": BedrockErrorInfo(
132+
error_type="model_unavailable",
133+
user_message="The requested AI model is not currently available. Please try again later.",
134+
technical_details="Model not ready or unavailable",
135+
retry_recommended=True,
136+
retry_delay_seconds=120,
137+
action_recommendations=[
138+
"Wait a few minutes and try again",
139+
"Contact support if the issue persists",
140+
"Check if there are any service announcements",
141+
],
142+
is_transient=True,
143+
),
144+
"RequestTimeout": BedrockErrorInfo(
145+
error_type="timeout",
146+
user_message="Your request took too long to process. Please try again with a shorter or simpler request.",
147+
technical_details="Request timeout",
148+
retry_recommended=True,
149+
retry_delay_seconds=30,
150+
action_recommendations=[
151+
"Try breaking your request into smaller parts",
152+
"Simplify your question or request",
153+
"Wait a moment and try again",
154+
],
155+
is_transient=True,
156+
),
157+
"RequestTimeoutException": BedrockErrorInfo(
158+
error_type="timeout",
159+
user_message="Your request took too long to process. Please try again with a shorter or simpler request.",
160+
technical_details="Request timeout",
161+
retry_recommended=True,
162+
retry_delay_seconds=30,
163+
action_recommendations=[
164+
"Try breaking your request into smaller parts",
165+
"Simplify your question or request",
166+
"Wait a moment and try again",
167+
],
168+
is_transient=True,
169+
),
170+
"ServiceQuotaExceededException": BedrockErrorInfo(
171+
error_type="quota_exceeded",
172+
user_message="Your usage quota has been exceeded. Please wait or contact your administrator to increase limits.",
173+
technical_details="Service quota exceeded",
174+
retry_recommended=True,
175+
retry_delay_seconds=3600, # 1 hour
176+
action_recommendations=[
177+
"Wait for your quota to reset (usually hourly or daily)",
178+
"Contact your administrator to increase limits",
179+
"Reduce the frequency of your requests",
180+
],
181+
is_transient=True,
182+
),
183+
"TooManyRequestsException": BedrockErrorInfo(
184+
error_type="too_many_requests",
185+
user_message="Too many requests have been made recently. Please wait before trying again.",
186+
technical_details="Too many requests",
187+
retry_recommended=True,
188+
retry_delay_seconds=300, # 5 minutes
189+
action_recommendations=[
190+
"Wait 5-10 minutes before retrying",
191+
"Reduce the frequency of your requests",
192+
"Try again during off-peak hours",
193+
],
194+
is_transient=True,
195+
),
196+
}
197+
198+
@classmethod
199+
def extract_error_code(cls, exception: Exception) -> Optional[str]:
200+
"""
201+
Extract error code from various exception types.
202+
203+
Args:
204+
exception: The exception to analyze
205+
206+
Returns:
207+
The error code if found, None otherwise
208+
"""
209+
# Handle botocore ClientError (most common)
210+
if isinstance(exception, botocore.exceptions.ClientError):
211+
error_code = exception.response.get("Error", {}).get("Code")
212+
if error_code:
213+
return error_code
214+
215+
# For EventStreamError (subclass of ClientError), extract from message
216+
# Format: "An error occurred (errorCode) when calling..."
217+
match = re.search(r"\((\w+)\)", str(exception))
218+
if match:
219+
return match.group(1)
220+
221+
# Handle other exception types by name
222+
exception_name = type(exception).__name__
223+
if exception_name in cls.ERROR_MAPPINGS:
224+
return exception_name
225+
226+
# Check if exception message contains known error patterns
227+
exception_str = str(exception).lower()
228+
for error_code in cls.ERROR_MAPPINGS:
229+
if error_code.lower() in exception_str:
230+
return error_code
231+
232+
return None
233+
234+
@classmethod
235+
def get_error_info(
236+
cls, exception: Exception, retry_attempts: int = 0
237+
) -> BedrockErrorInfo:
238+
"""
239+
Get structured error information for a Bedrock exception.
240+
241+
Args:
242+
exception: The exception to analyze
243+
retry_attempts: Number of retry attempts made
244+
245+
Returns:
246+
BedrockErrorInfo with user-friendly message and recommendations
247+
"""
248+
error_code = cls.extract_error_code(exception)
249+
250+
# Get base error info from mappings
251+
if error_code and error_code in cls.ERROR_MAPPINGS:
252+
error_info = cls.ERROR_MAPPINGS[error_code]
253+
else:
254+
# Default error info for unknown errors
255+
error_info = BedrockErrorInfo(
256+
error_type="unknown_error",
257+
user_message="An unexpected error occurred while processing your request. Please try again.",
258+
technical_details=f"Unknown error: {str(exception)}",
259+
retry_recommended=True,
260+
retry_delay_seconds=30,
261+
action_recommendations=[
262+
"Wait a moment and try again",
263+
"Check your internet connection",
264+
"Contact support if the problem persists",
265+
],
266+
is_transient=True,
267+
)
268+
269+
# Enhance error info with retry context
270+
enhanced_error_info = BedrockErrorInfo(
271+
error_type=error_info.error_type,
272+
user_message=cls._enhance_message_with_retry_context(
273+
error_info.user_message, retry_attempts
274+
),
275+
technical_details=f"{error_info.technical_details} (after {retry_attempts} retries)"
276+
if retry_attempts > 0
277+
else error_info.technical_details,
278+
retry_recommended=error_info.retry_recommended
279+
and retry_attempts < 3, # Don't recommend retry after 3 attempts
280+
retry_delay_seconds=error_info.retry_delay_seconds,
281+
action_recommendations=error_info.action_recommendations,
282+
is_transient=error_info.is_transient,
283+
)
284+
285+
return enhanced_error_info
286+
287+
@classmethod
288+
def _enhance_message_with_retry_context(
289+
cls, base_message: str, retry_attempts: int
290+
) -> str:
291+
"""
292+
Enhance error message with retry context.
293+
294+
Args:
295+
base_message: The base error message
296+
retry_attempts: Number of retry attempts made
297+
298+
Returns:
299+
Enhanced message with retry context
300+
"""
301+
if retry_attempts == 0:
302+
return base_message
303+
elif retry_attempts == 1:
304+
return f"{base_message} We tried once more but the issue persists."
305+
elif retry_attempts <= 3:
306+
return f"{base_message} We tried {retry_attempts} times but the issue persists."
307+
else:
308+
return f"{base_message} After multiple attempts, the service appears to be experiencing ongoing issues."
309+
310+
@classmethod
311+
def format_error_for_frontend(
312+
cls, exception: Exception, retry_attempts: int = 0
313+
) -> Dict[str, any]:
314+
"""
315+
Format error information for frontend consumption.
316+
317+
Args:
318+
exception: The exception to format
319+
retry_attempts: Number of retry attempts made
320+
321+
Returns:
322+
Dictionary with error information formatted for frontend display
323+
"""
324+
error_info = cls.get_error_info(exception, retry_attempts)
325+
326+
return {
327+
"errorType": error_info.error_type,
328+
"message": error_info.user_message,
329+
"technicalDetails": error_info.technical_details,
330+
"retryRecommended": error_info.retry_recommended,
331+
"retryDelaySeconds": error_info.retry_delay_seconds,
332+
"actionRecommendations": error_info.action_recommendations or [],
333+
"isTransient": error_info.is_transient,
334+
"retryAttempts": retry_attempts,
335+
}
336+
337+
@classmethod
338+
def is_retryable_error(cls, exception: Exception) -> bool:
339+
"""
340+
Check if an error is retryable based on its type.
341+
342+
Args:
343+
exception: The exception to check
344+
345+
Returns:
346+
True if the error is retryable, False otherwise
347+
"""
348+
error_info = cls.get_error_info(exception)
349+
return error_info.retry_recommended and error_info.is_transient

0 commit comments

Comments
 (0)