diff --git a/src/llama_stack_client/_client.py b/src/llama_stack_client/_client.py
index ea0705b6..763dbe24 100644
--- a/src/llama_stack_client/_client.py
+++ b/src/llama_stack_client/_client.py
@@ -105,7 +105,7 @@ def __init__(
         base_url: str | httpx.URL | None = None,
         timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
         max_retries: int = DEFAULT_MAX_RETRIES,
-        default_headers: dict[str, str] | None = None,
+        default_headers: Mapping[str, str] | None = None,
         default_query: Mapping[str, object] | None = None,
         # Configure a custom httpx client.
         # We provide a `DefaultHttpxClient` class that you can pass to retain the default values we use for `limits`, `timeout` & `follow_redirects`.
@@ -161,9 +161,7 @@ def __init__(
         self.routes = routes.RoutesResource(self)
         self.safety = safety.SafetyResource(self)
         self.shields = shields.ShieldsResource(self)
-        self.synthetic_data_generation = (
-            synthetic_data_generation.SyntheticDataGenerationResource(self)
-        )
+        self.synthetic_data_generation = synthetic_data_generation.SyntheticDataGenerationResource(self)
         self.telemetry = telemetry.TelemetryResource(self)
         self.datasetio = datasetio.DatasetioResource(self)
         self.scoring = scoring.ScoringResource(self)
@@ -193,8 +191,8 @@ def copy(
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
         http_client: httpx.Client | None = None,
         max_retries: int | NotGiven = NOT_GIVEN,
-        default_headers: dict[str, str] | None = None,
-        set_default_headers: dict[str, str] | None = None,
+        default_headers: Mapping[str, str] | None = None,
+        set_default_headers: Mapping[str, str] | None = None,
         default_query: Mapping[str, object] | None = None,
         set_default_query: Mapping[str, object] | None = None,
         _extra_kwargs: Mapping[str, Any] = {},
@@ -203,14 +201,10 @@ def copy(
         Create a new client instance re-using the same options given to the current client with optional overriding.
         """
         if default_headers is not None and set_default_headers is not None:
-            raise ValueError(
-                "The `default_headers` and `set_default_headers` arguments are mutually exclusive"
-            )
+            raise ValueError("The `default_headers` and `set_default_headers` arguments are mutually exclusive")
 
         if default_query is not None and set_default_query is not None:
-            raise ValueError(
-                "The `default_query` and `set_default_query` arguments are mutually exclusive"
-            )
+            raise ValueError("The `default_query` and `set_default_query` arguments are mutually exclusive")
 
         headers = self._custom_headers
         if default_headers is not None:
@@ -251,14 +245,10 @@ def _make_status_error(
             return _exceptions.BadRequestError(err_msg, response=response, body=body)
 
         if response.status_code == 401:
-            return _exceptions.AuthenticationError(
-                err_msg, response=response, body=body
-            )
+            return _exceptions.AuthenticationError(err_msg, response=response, body=body)
 
         if response.status_code == 403:
-            return _exceptions.PermissionDeniedError(
-                err_msg, response=response, body=body
-            )
+            return _exceptions.PermissionDeniedError(err_msg, response=response, body=body)
 
         if response.status_code == 404:
             return _exceptions.NotFoundError(err_msg, response=response, body=body)
@@ -267,17 +257,13 @@ def _make_status_error(
             return _exceptions.ConflictError(err_msg, response=response, body=body)
 
         if response.status_code == 422:
-            return _exceptions.UnprocessableEntityError(
-                err_msg, response=response, body=body
-            )
+            return _exceptions.UnprocessableEntityError(err_msg, response=response, body=body)
 
         if response.status_code == 429:
             return _exceptions.RateLimitError(err_msg, response=response, body=body)
 
         if response.status_code >= 500:
-            return _exceptions.InternalServerError(
-                err_msg, response=response, body=body
-            )
+            return _exceptions.InternalServerError(err_msg, response=response, body=body)
         return APIStatusError(err_msg, response=response, body=body)
 
 
@@ -299,9 +285,7 @@ class AsyncLlamaStackClient(AsyncAPIClient):
     routes: routes.AsyncRoutesResource
     safety: safety.AsyncSafetyResource
     shields: shields.AsyncShieldsResource
-    synthetic_data_generation: (
-        synthetic_data_generation.AsyncSyntheticDataGenerationResource
-    )
+    synthetic_data_generation: synthetic_data_generation.AsyncSyntheticDataGenerationResource
     telemetry: telemetry.AsyncTelemetryResource
     datasetio: datasetio.AsyncDatasetioResource
     scoring: scoring.AsyncScoringResource
@@ -318,7 +302,7 @@ def __init__(
         base_url: str | httpx.URL | None = None,
         timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
         max_retries: int = DEFAULT_MAX_RETRIES,
-        default_headers: dict[str, str] | None = None,
+        default_headers: Mapping[str, str] | None = None,
         default_query: Mapping[str, object] | None = None,
         # Configure a custom httpx client.
         # We provide a `DefaultAsyncHttpxClient` class that you can pass to retain the default values we use for `limits`, `timeout` & `follow_redirects`.
@@ -374,9 +358,7 @@ def __init__(
         self.routes = routes.AsyncRoutesResource(self)
         self.safety = safety.AsyncSafetyResource(self)
         self.shields = shields.AsyncShieldsResource(self)
-        self.synthetic_data_generation = (
-            synthetic_data_generation.AsyncSyntheticDataGenerationResource(self)
-        )
+        self.synthetic_data_generation = synthetic_data_generation.AsyncSyntheticDataGenerationResource(self)
         self.telemetry = telemetry.AsyncTelemetryResource(self)
         self.datasetio = datasetio.AsyncDatasetioResource(self)
         self.scoring = scoring.AsyncScoringResource(self)
@@ -406,8 +388,8 @@ def copy(
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
         http_client: httpx.AsyncClient | None = None,
         max_retries: int | NotGiven = NOT_GIVEN,
-        default_headers: dict[str, str] | None = None,
-        set_default_headers: dict[str, str] | None = None,
+        default_headers: Mapping[str, str] | None = None,
+        set_default_headers: Mapping[str, str] | None = None,
         default_query: Mapping[str, object] | None = None,
         set_default_query: Mapping[str, object] | None = None,
         _extra_kwargs: Mapping[str, Any] = {},
@@ -416,14 +398,10 @@ def copy(
         Create a new client instance re-using the same options given to the current client with optional overriding.
         """
         if default_headers is not None and set_default_headers is not None:
-            raise ValueError(
-                "The `default_headers` and `set_default_headers` arguments are mutually exclusive"
-            )
+            raise ValueError("The `default_headers` and `set_default_headers` arguments are mutually exclusive")
 
         if default_query is not None and set_default_query is not None:
-            raise ValueError(
-                "The `default_query` and `set_default_query` arguments are mutually exclusive"
-            )
+            raise ValueError("The `default_query` and `set_default_query` arguments are mutually exclusive")
 
         headers = self._custom_headers
         if default_headers is not None:
@@ -464,14 +442,10 @@ def _make_status_error(
             return _exceptions.BadRequestError(err_msg, response=response, body=body)
 
         if response.status_code == 401:
-            return _exceptions.AuthenticationError(
-                err_msg, response=response, body=body
-            )
+            return _exceptions.AuthenticationError(err_msg, response=response, body=body)
 
         if response.status_code == 403:
-            return _exceptions.PermissionDeniedError(
-                err_msg, response=response, body=body
-            )
+            return _exceptions.PermissionDeniedError(err_msg, response=response, body=body)
 
         if response.status_code == 404:
             return _exceptions.NotFoundError(err_msg, response=response, body=body)
@@ -480,232 +454,138 @@ def _make_status_error(
             return _exceptions.ConflictError(err_msg, response=response, body=body)
 
         if response.status_code == 422:
-            return _exceptions.UnprocessableEntityError(
-                err_msg, response=response, body=body
-            )
+            return _exceptions.UnprocessableEntityError(err_msg, response=response, body=body)
 
         if response.status_code == 429:
             return _exceptions.RateLimitError(err_msg, response=response, body=body)
 
         if response.status_code >= 500:
-            return _exceptions.InternalServerError(
-                err_msg, response=response, body=body
-            )
+            return _exceptions.InternalServerError(err_msg, response=response, body=body)
         return APIStatusError(err_msg, response=response, body=body)
 
 
 class LlamaStackClientWithRawResponse:
     def __init__(self, client: LlamaStackClient) -> None:
-        self.toolgroups = toolgroups.ToolgroupsResourceWithRawResponse(
-            client.toolgroups
-        )
+        self.toolgroups = toolgroups.ToolgroupsResourceWithRawResponse(client.toolgroups)
         self.tools = tools.ToolsResourceWithRawResponse(client.tools)
-        self.tool_runtime = tool_runtime.ToolRuntimeResourceWithRawResponse(
-            client.tool_runtime
-        )
+        self.tool_runtime = tool_runtime.ToolRuntimeResourceWithRawResponse(client.tool_runtime)
         self.agents = agents.AgentsResourceWithRawResponse(client.agents)
-        self.batch_inference = batch_inference.BatchInferenceResourceWithRawResponse(
-            client.batch_inference
-        )
+        self.batch_inference = batch_inference.BatchInferenceResourceWithRawResponse(client.batch_inference)
         self.datasets = datasets.DatasetsResourceWithRawResponse(client.datasets)
         self.eval = eval.EvalResourceWithRawResponse(client.eval)
         self.inspect = inspect.InspectResourceWithRawResponse(client.inspect)
         self.inference = inference.InferenceResourceWithRawResponse(client.inference)
         self.memory = memory.MemoryResourceWithRawResponse(client.memory)
-        self.memory_banks = memory_banks.MemoryBanksResourceWithRawResponse(
-            client.memory_banks
-        )
+        self.memory_banks = memory_banks.MemoryBanksResourceWithRawResponse(client.memory_banks)
         self.models = models.ModelsResourceWithRawResponse(client.models)
-        self.post_training = post_training.PostTrainingResourceWithRawResponse(
-            client.post_training
-        )
+        self.post_training = post_training.PostTrainingResourceWithRawResponse(client.post_training)
         self.providers = providers.ProvidersResourceWithRawResponse(client.providers)
         self.routes = routes.RoutesResourceWithRawResponse(client.routes)
         self.safety = safety.SafetyResourceWithRawResponse(client.safety)
         self.shields = shields.ShieldsResourceWithRawResponse(client.shields)
-        self.synthetic_data_generation = (
-            synthetic_data_generation.SyntheticDataGenerationResourceWithRawResponse(
-                client.synthetic_data_generation
-            )
+        self.synthetic_data_generation = synthetic_data_generation.SyntheticDataGenerationResourceWithRawResponse(
+            client.synthetic_data_generation
         )
         self.telemetry = telemetry.TelemetryResourceWithRawResponse(client.telemetry)
         self.datasetio = datasetio.DatasetioResourceWithRawResponse(client.datasetio)
         self.scoring = scoring.ScoringResourceWithRawResponse(client.scoring)
-        self.scoring_functions = (
-            scoring_functions.ScoringFunctionsResourceWithRawResponse(
-                client.scoring_functions
-            )
-        )
+        self.scoring_functions = scoring_functions.ScoringFunctionsResourceWithRawResponse(client.scoring_functions)
         self.eval_tasks = eval_tasks.EvalTasksResourceWithRawResponse(client.eval_tasks)
 
 
 class AsyncLlamaStackClientWithRawResponse:
     def __init__(self, client: AsyncLlamaStackClient) -> None:
-        self.toolgroups = toolgroups.AsyncToolgroupsResourceWithRawResponse(
-            client.toolgroups
-        )
+        self.toolgroups = toolgroups.AsyncToolgroupsResourceWithRawResponse(client.toolgroups)
         self.tools = tools.AsyncToolsResourceWithRawResponse(client.tools)
-        self.tool_runtime = tool_runtime.AsyncToolRuntimeResourceWithRawResponse(
-            client.tool_runtime
-        )
+        self.tool_runtime = tool_runtime.AsyncToolRuntimeResourceWithRawResponse(client.tool_runtime)
         self.agents = agents.AsyncAgentsResourceWithRawResponse(client.agents)
-        self.batch_inference = (
-            batch_inference.AsyncBatchInferenceResourceWithRawResponse(
-                client.batch_inference
-            )
-        )
+        self.batch_inference = batch_inference.AsyncBatchInferenceResourceWithRawResponse(client.batch_inference)
         self.datasets = datasets.AsyncDatasetsResourceWithRawResponse(client.datasets)
         self.eval = eval.AsyncEvalResourceWithRawResponse(client.eval)
         self.inspect = inspect.AsyncInspectResourceWithRawResponse(client.inspect)
-        self.inference = inference.AsyncInferenceResourceWithRawResponse(
-            client.inference
-        )
+        self.inference = inference.AsyncInferenceResourceWithRawResponse(client.inference)
         self.memory = memory.AsyncMemoryResourceWithRawResponse(client.memory)
-        self.memory_banks = memory_banks.AsyncMemoryBanksResourceWithRawResponse(
-            client.memory_banks
-        )
+        self.memory_banks = memory_banks.AsyncMemoryBanksResourceWithRawResponse(client.memory_banks)
         self.models = models.AsyncModelsResourceWithRawResponse(client.models)
-        self.post_training = post_training.AsyncPostTrainingResourceWithRawResponse(
-            client.post_training
-        )
-        self.providers = providers.AsyncProvidersResourceWithRawResponse(
-            client.providers
-        )
+        self.post_training = post_training.AsyncPostTrainingResourceWithRawResponse(client.post_training)
+        self.providers = providers.AsyncProvidersResourceWithRawResponse(client.providers)
         self.routes = routes.AsyncRoutesResourceWithRawResponse(client.routes)
         self.safety = safety.AsyncSafetyResourceWithRawResponse(client.safety)
         self.shields = shields.AsyncShieldsResourceWithRawResponse(client.shields)
         self.synthetic_data_generation = synthetic_data_generation.AsyncSyntheticDataGenerationResourceWithRawResponse(
             client.synthetic_data_generation
         )
-        self.telemetry = telemetry.AsyncTelemetryResourceWithRawResponse(
-            client.telemetry
-        )
-        self.datasetio = datasetio.AsyncDatasetioResourceWithRawResponse(
-            client.datasetio
-        )
+        self.telemetry = telemetry.AsyncTelemetryResourceWithRawResponse(client.telemetry)
+        self.datasetio = datasetio.AsyncDatasetioResourceWithRawResponse(client.datasetio)
         self.scoring = scoring.AsyncScoringResourceWithRawResponse(client.scoring)
-        self.scoring_functions = (
-            scoring_functions.AsyncScoringFunctionsResourceWithRawResponse(
-                client.scoring_functions
-            )
-        )
-        self.eval_tasks = eval_tasks.AsyncEvalTasksResourceWithRawResponse(
-            client.eval_tasks
+        self.scoring_functions = scoring_functions.AsyncScoringFunctionsResourceWithRawResponse(
+            client.scoring_functions
         )
+        self.eval_tasks = eval_tasks.AsyncEvalTasksResourceWithRawResponse(client.eval_tasks)
 
 
 class LlamaStackClientWithStreamedResponse:
     def __init__(self, client: LlamaStackClient) -> None:
-        self.toolgroups = toolgroups.ToolgroupsResourceWithStreamingResponse(
-            client.toolgroups
-        )
+        self.toolgroups = toolgroups.ToolgroupsResourceWithStreamingResponse(client.toolgroups)
         self.tools = tools.ToolsResourceWithStreamingResponse(client.tools)
-        self.tool_runtime = tool_runtime.ToolRuntimeResourceWithStreamingResponse(
-            client.tool_runtime
-        )
+        self.tool_runtime = tool_runtime.ToolRuntimeResourceWithStreamingResponse(client.tool_runtime)
         self.agents = agents.AgentsResourceWithStreamingResponse(client.agents)
-        self.batch_inference = (
-            batch_inference.BatchInferenceResourceWithStreamingResponse(
-                client.batch_inference
-            )
-        )
+        self.batch_inference = batch_inference.BatchInferenceResourceWithStreamingResponse(client.batch_inference)
         self.datasets = datasets.DatasetsResourceWithStreamingResponse(client.datasets)
         self.eval = eval.EvalResourceWithStreamingResponse(client.eval)
         self.inspect = inspect.InspectResourceWithStreamingResponse(client.inspect)
-        self.inference = inference.InferenceResourceWithStreamingResponse(
-            client.inference
-        )
+        self.inference = inference.InferenceResourceWithStreamingResponse(client.inference)
         self.memory = memory.MemoryResourceWithStreamingResponse(client.memory)
-        self.memory_banks = memory_banks.MemoryBanksResourceWithStreamingResponse(
-            client.memory_banks
-        )
+        self.memory_banks = memory_banks.MemoryBanksResourceWithStreamingResponse(client.memory_banks)
         self.models = models.ModelsResourceWithStreamingResponse(client.models)
-        self.post_training = post_training.PostTrainingResourceWithStreamingResponse(
-            client.post_training
-        )
-        self.providers = providers.ProvidersResourceWithStreamingResponse(
-            client.providers
-        )
+        self.post_training = post_training.PostTrainingResourceWithStreamingResponse(client.post_training)
+        self.providers = providers.ProvidersResourceWithStreamingResponse(client.providers)
         self.routes = routes.RoutesResourceWithStreamingResponse(client.routes)
         self.safety = safety.SafetyResourceWithStreamingResponse(client.safety)
         self.shields = shields.ShieldsResourceWithStreamingResponse(client.shields)
         self.synthetic_data_generation = synthetic_data_generation.SyntheticDataGenerationResourceWithStreamingResponse(
             client.synthetic_data_generation
         )
-        self.telemetry = telemetry.TelemetryResourceWithStreamingResponse(
-            client.telemetry
-        )
-        self.datasetio = datasetio.DatasetioResourceWithStreamingResponse(
-            client.datasetio
-        )
+        self.telemetry = telemetry.TelemetryResourceWithStreamingResponse(client.telemetry)
+        self.datasetio = datasetio.DatasetioResourceWithStreamingResponse(client.datasetio)
         self.scoring = scoring.ScoringResourceWithStreamingResponse(client.scoring)
-        self.scoring_functions = (
-            scoring_functions.ScoringFunctionsResourceWithStreamingResponse(
-                client.scoring_functions
-            )
-        )
-        self.eval_tasks = eval_tasks.EvalTasksResourceWithStreamingResponse(
-            client.eval_tasks
+        self.scoring_functions = scoring_functions.ScoringFunctionsResourceWithStreamingResponse(
+            client.scoring_functions
         )
+        self.eval_tasks = eval_tasks.EvalTasksResourceWithStreamingResponse(client.eval_tasks)
 
 
 class AsyncLlamaStackClientWithStreamedResponse:
     def __init__(self, client: AsyncLlamaStackClient) -> None:
-        self.toolgroups = toolgroups.AsyncToolgroupsResourceWithStreamingResponse(
-            client.toolgroups
-        )
+        self.toolgroups = toolgroups.AsyncToolgroupsResourceWithStreamingResponse(client.toolgroups)
         self.tools = tools.AsyncToolsResourceWithStreamingResponse(client.tools)
-        self.tool_runtime = tool_runtime.AsyncToolRuntimeResourceWithStreamingResponse(
-            client.tool_runtime
-        )
+        self.tool_runtime = tool_runtime.AsyncToolRuntimeResourceWithStreamingResponse(client.tool_runtime)
         self.agents = agents.AsyncAgentsResourceWithStreamingResponse(client.agents)
-        self.batch_inference = (
-            batch_inference.AsyncBatchInferenceResourceWithStreamingResponse(
-                client.batch_inference
-            )
-        )
-        self.datasets = datasets.AsyncDatasetsResourceWithStreamingResponse(
-            client.datasets
-        )
+        self.batch_inference = batch_inference.AsyncBatchInferenceResourceWithStreamingResponse(client.batch_inference)
+        self.datasets = datasets.AsyncDatasetsResourceWithStreamingResponse(client.datasets)
         self.eval = eval.AsyncEvalResourceWithStreamingResponse(client.eval)
         self.inspect = inspect.AsyncInspectResourceWithStreamingResponse(client.inspect)
-        self.inference = inference.AsyncInferenceResourceWithStreamingResponse(
-            client.inference
-        )
+        self.inference = inference.AsyncInferenceResourceWithStreamingResponse(client.inference)
         self.memory = memory.AsyncMemoryResourceWithStreamingResponse(client.memory)
-        self.memory_banks = memory_banks.AsyncMemoryBanksResourceWithStreamingResponse(
-            client.memory_banks
-        )
+        self.memory_banks = memory_banks.AsyncMemoryBanksResourceWithStreamingResponse(client.memory_banks)
         self.models = models.AsyncModelsResourceWithStreamingResponse(client.models)
-        self.post_training = (
-            post_training.AsyncPostTrainingResourceWithStreamingResponse(
-                client.post_training
-            )
-        )
-        self.providers = providers.AsyncProvidersResourceWithStreamingResponse(
-            client.providers
-        )
+        self.post_training = post_training.AsyncPostTrainingResourceWithStreamingResponse(client.post_training)
+        self.providers = providers.AsyncProvidersResourceWithStreamingResponse(client.providers)
         self.routes = routes.AsyncRoutesResourceWithStreamingResponse(client.routes)
         self.safety = safety.AsyncSafetyResourceWithStreamingResponse(client.safety)
         self.shields = shields.AsyncShieldsResourceWithStreamingResponse(client.shields)
-        self.synthetic_data_generation = synthetic_data_generation.AsyncSyntheticDataGenerationResourceWithStreamingResponse(
-            client.synthetic_data_generation
-        )
-        self.telemetry = telemetry.AsyncTelemetryResourceWithStreamingResponse(
-            client.telemetry
-        )
-        self.datasetio = datasetio.AsyncDatasetioResourceWithStreamingResponse(
-            client.datasetio
-        )
-        self.scoring = scoring.AsyncScoringResourceWithStreamingResponse(client.scoring)
-        self.scoring_functions = (
-            scoring_functions.AsyncScoringFunctionsResourceWithStreamingResponse(
-                client.scoring_functions
+        self.synthetic_data_generation = (
+            synthetic_data_generation.AsyncSyntheticDataGenerationResourceWithStreamingResponse(
+                client.synthetic_data_generation
             )
         )
-        self.eval_tasks = eval_tasks.AsyncEvalTasksResourceWithStreamingResponse(
-            client.eval_tasks
+        self.telemetry = telemetry.AsyncTelemetryResourceWithStreamingResponse(client.telemetry)
+        self.datasetio = datasetio.AsyncDatasetioResourceWithStreamingResponse(client.datasetio)
+        self.scoring = scoring.AsyncScoringResourceWithStreamingResponse(client.scoring)
+        self.scoring_functions = scoring_functions.AsyncScoringFunctionsResourceWithStreamingResponse(
+            client.scoring_functions
         )
+        self.eval_tasks = eval_tasks.AsyncEvalTasksResourceWithStreamingResponse(client.eval_tasks)
 
 
 Client = LlamaStackClient
diff --git a/src/llama_stack_client/_models.py b/src/llama_stack_client/_models.py
index d56ea1d9..9a918aab 100644
--- a/src/llama_stack_client/_models.py
+++ b/src/llama_stack_client/_models.py
@@ -179,14 +179,14 @@ def __str__(self) -> str:
     @classmethod
     @override
     def construct(  # pyright: ignore[reportIncompatibleMethodOverride]
-        cls: Type[ModelT],
+        __cls: Type[ModelT],
         _fields_set: set[str] | None = None,
         **values: object,
     ) -> ModelT:
-        m = cls.__new__(cls)
+        m = __cls.__new__(__cls)
         fields_values: dict[str, object] = {}
 
-        config = get_model_config(cls)
+        config = get_model_config(__cls)
         populate_by_name = (
             config.allow_population_by_field_name
             if isinstance(config, _ConfigProtocol)
@@ -196,7 +196,7 @@ def construct(  # pyright: ignore[reportIncompatibleMethodOverride]
         if _fields_set is None:
             _fields_set = set()
 
-        model_fields = get_model_fields(cls)
+        model_fields = get_model_fields(__cls)
         for name, field in model_fields.items():
             key = field.alias
             if key is None or (key not in values and populate_by_name):
diff --git a/src/llama_stack_client/types/__init__.py b/src/llama_stack_client/types/__init__.py
index be552ab7..c141c4db 100644
--- a/src/llama_stack_client/types/__init__.py
+++ b/src/llama_stack_client/types/__init__.py
@@ -13,6 +13,7 @@
     ReturnType as ReturnType,
     AgentConfig as AgentConfig,
     UserMessage as UserMessage,
+    ContentDelta as ContentDelta,
     ScoringResult as ScoringResult,
     SystemMessage as SystemMessage,
     SamplingParams as SamplingParams,
diff --git a/src/llama_stack_client/types/agents/turn_create_response.py b/src/llama_stack_client/types/agents/turn_create_response.py
index da651c3d..055c1fe6 100644
--- a/src/llama_stack_client/types/agents/turn_create_response.py
+++ b/src/llama_stack_client/types/agents/turn_create_response.py
@@ -6,9 +6,9 @@
 from .turn import Turn
 from ..._models import BaseModel
 from ..inference_step import InferenceStep
-from ..shared.tool_call import ToolCall
 from ..shield_call_step import ShieldCallStep
 from ..tool_execution_step import ToolExecutionStep
+from ..shared.content_delta import ContentDelta
 from ..memory_retrieval_step import MemoryRetrievalStep
 
 __all__ = [
@@ -18,8 +18,6 @@
     "AgentTurnResponseStreamChunkEventPayload",
     "AgentTurnResponseStreamChunkEventPayloadAgentTurnResponseStepStartPayload",
     "AgentTurnResponseStreamChunkEventPayloadAgentTurnResponseStepProgressPayload",
-    "AgentTurnResponseStreamChunkEventPayloadAgentTurnResponseStepProgressPayloadToolCallDelta",
-    "AgentTurnResponseStreamChunkEventPayloadAgentTurnResponseStepProgressPayloadToolCallDeltaContent",
     "AgentTurnResponseStreamChunkEventPayloadAgentTurnResponseStepCompletePayload",
     "AgentTurnResponseStreamChunkEventPayloadAgentTurnResponseStepCompletePayloadStepDetails",
     "AgentTurnResponseStreamChunkEventPayloadAgentTurnResponseTurnStartPayload",
@@ -37,30 +35,15 @@ class AgentTurnResponseStreamChunkEventPayloadAgentTurnResponseStepStartPayload(
     metadata: Optional[Dict[str, Union[bool, float, str, List[object], object, None]]] = None
 
 
-AgentTurnResponseStreamChunkEventPayloadAgentTurnResponseStepProgressPayloadToolCallDeltaContent: TypeAlias = Union[
-    str, ToolCall
-]
-
-
-class AgentTurnResponseStreamChunkEventPayloadAgentTurnResponseStepProgressPayloadToolCallDelta(BaseModel):
-    content: AgentTurnResponseStreamChunkEventPayloadAgentTurnResponseStepProgressPayloadToolCallDeltaContent
-
-    parse_status: Literal["started", "in_progress", "failure", "success"]
-
-
 class AgentTurnResponseStreamChunkEventPayloadAgentTurnResponseStepProgressPayload(BaseModel):
+    delta: ContentDelta
+
     event_type: Literal["step_progress"]
 
     step_id: str
 
     step_type: Literal["inference", "tool_execution", "shield_call", "memory_retrieval"]
 
-    text_delta: Optional[str] = None
-
-    tool_call_delta: Optional[
-        AgentTurnResponseStreamChunkEventPayloadAgentTurnResponseStepProgressPayloadToolCallDelta
-    ] = None
-
 
 AgentTurnResponseStreamChunkEventPayloadAgentTurnResponseStepCompletePayloadStepDetails: TypeAlias = Union[
     InferenceStep, ToolExecutionStep, ShieldCallStep, MemoryRetrievalStep
diff --git a/src/llama_stack_client/types/inference_chat_completion_response.py b/src/llama_stack_client/types/inference_chat_completion_response.py
index 658fa52d..26844a45 100644
--- a/src/llama_stack_client/types/inference_chat_completion_response.py
+++ b/src/llama_stack_client/types/inference_chat_completion_response.py
@@ -6,6 +6,7 @@
 from .._models import BaseModel
 from .token_log_probs import TokenLogProbs
 from .shared.tool_call import ToolCall
+from .shared.content_delta import ContentDelta
 from .shared.interleaved_content import InterleavedContent
 
 __all__ = [
@@ -14,9 +15,6 @@
     "ChatCompletionResponseCompletionMessage",
     "ChatCompletionResponseStreamChunk",
     "ChatCompletionResponseStreamChunkEvent",
-    "ChatCompletionResponseStreamChunkEventDelta",
-    "ChatCompletionResponseStreamChunkEventDeltaToolCallDelta",
-    "ChatCompletionResponseStreamChunkEventDeltaToolCallDeltaContent",
 ]
 
 
@@ -36,22 +34,8 @@ class ChatCompletionResponse(BaseModel):
     logprobs: Optional[List[TokenLogProbs]] = None
 
 
-ChatCompletionResponseStreamChunkEventDeltaToolCallDeltaContent: TypeAlias = Union[str, ToolCall]
-
-
-class ChatCompletionResponseStreamChunkEventDeltaToolCallDelta(BaseModel):
-    content: ChatCompletionResponseStreamChunkEventDeltaToolCallDeltaContent
-
-    parse_status: Literal["started", "in_progress", "failure", "success"]
-
-
-ChatCompletionResponseStreamChunkEventDelta: TypeAlias = Union[
-    str, ChatCompletionResponseStreamChunkEventDeltaToolCallDelta
-]
-
-
 class ChatCompletionResponseStreamChunkEvent(BaseModel):
-    delta: ChatCompletionResponseStreamChunkEventDelta
+    delta: ContentDelta
 
     event_type: Literal["start", "complete", "progress"]
 
diff --git a/src/llama_stack_client/types/shared/__init__.py b/src/llama_stack_client/types/shared/__init__.py
index aeafd687..fee78dd0 100644
--- a/src/llama_stack_client/types/shared/__init__.py
+++ b/src/llama_stack_client/types/shared/__init__.py
@@ -6,6 +6,7 @@
 from .return_type import ReturnType as ReturnType
 from .agent_config import AgentConfig as AgentConfig
 from .user_message import UserMessage as UserMessage
+from .content_delta import ContentDelta as ContentDelta
 from .scoring_result import ScoringResult as ScoringResult
 from .system_message import SystemMessage as SystemMessage
 from .sampling_params import SamplingParams as SamplingParams
diff --git a/src/llama_stack_client/types/shared/content_delta.py b/src/llama_stack_client/types/shared/content_delta.py
new file mode 100644
index 00000000..f4f03a68
--- /dev/null
+++ b/src/llama_stack_client/types/shared/content_delta.py
@@ -0,0 +1,35 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal, TypeAlias
+
+from ..._models import BaseModel
+from .tool_call import ToolCall
+
+__all__ = ["ContentDelta", "UnionMember0", "UnionMember1", "ToolCallDelta", "ToolCallDeltaContent"]
+
+
+class UnionMember0(BaseModel):
+    text: str
+
+    type: Literal["text"]
+
+
+class UnionMember1(BaseModel):
+    data: str
+
+    type: Literal["image"]
+
+
+ToolCallDeltaContent: TypeAlias = Union[str, ToolCall]
+
+
+class ToolCallDelta(BaseModel):
+    content: ToolCallDeltaContent
+
+    parse_status: Literal["started", "in_progress", "failed", "succeeded"]
+
+    type: Literal["tool_call"]
+
+
+ContentDelta: TypeAlias = Union[UnionMember0, UnionMember1, ToolCallDelta]
diff --git a/src/llama_stack_client/types/shared/tool_response_message.py b/src/llama_stack_client/types/shared/tool_response_message.py
index b3abdb8d..30efa449 100644
--- a/src/llama_stack_client/types/shared/tool_response_message.py
+++ b/src/llama_stack_client/types/shared/tool_response_message.py
@@ -14,6 +14,6 @@ class ToolResponseMessage(BaseModel):
 
     content: InterleavedContent
 
-    role: Literal["ipython"]
+    role: Literal["tool"]
 
     tool_name: Union[Literal["brave_search", "wolfram_alpha", "photogen", "code_interpreter"], str]
diff --git a/src/llama_stack_client/types/shared_params/tool_response_message.py b/src/llama_stack_client/types/shared_params/tool_response_message.py
index 6b6aaea6..625c9ba2 100644
--- a/src/llama_stack_client/types/shared_params/tool_response_message.py
+++ b/src/llama_stack_client/types/shared_params/tool_response_message.py
@@ -15,6 +15,6 @@ class ToolResponseMessage(TypedDict, total=False):
 
     content: Required[InterleavedContent]
 
-    role: Required[Literal["ipython"]]
+    role: Required[Literal["tool"]]
 
     tool_name: Required[Union[Literal["brave_search", "wolfram_alpha", "photogen", "code_interpreter"], str]]
diff --git a/src/llama_stack_client/types/tool.py b/src/llama_stack_client/types/tool.py
index dcf6b47e..c1c0cce7 100644
--- a/src/llama_stack_client/types/tool.py
+++ b/src/llama_stack_client/types/tool.py
@@ -38,16 +38,3 @@ class Tool(BaseModel):
     type: Literal["tool"]
 
     metadata: Optional[Dict[str, Union[bool, float, str, List[object], object, None]]] = None
-
-    tool_prompt_format: Optional[Literal["json", "function_tag", "python_list"]] = None
-    """
-    `json` -- Refers to the json format for calling tools. The json format takes the
-    form like { "type": "function", "function" : { "name": "function_name",
-    "description": "function_description", "parameters": {...} } }
-
-    `function_tag` -- This is an example of how you could define your own user
-    defined format for making tool calls. The function_tag format looks like this,
-    <function=function_name>(parameters)</function>
-
-    The detailed prompts for each of these formats are added to llama cli
-    """
diff --git a/src/llama_stack_client/types/tool_def.py b/src/llama_stack_client/types/tool_def.py
index c6114ab9..d96c5c5d 100644
--- a/src/llama_stack_client/types/tool_def.py
+++ b/src/llama_stack_client/types/tool_def.py
@@ -1,7 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Dict, List, Union, Optional
-from typing_extensions import Literal
 
 from .._models import BaseModel
 
@@ -28,16 +27,3 @@ class ToolDef(BaseModel):
     metadata: Optional[Dict[str, Union[bool, float, str, List[object], object, None]]] = None
 
     parameters: Optional[List[Parameter]] = None
-
-    tool_prompt_format: Optional[Literal["json", "function_tag", "python_list"]] = None
-    """
-    `json` -- Refers to the json format for calling tools. The json format takes the
-    form like { "type": "function", "function" : { "name": "function_name",
-    "description": "function_description", "parameters": {...} } }
-
-    `function_tag` -- This is an example of how you could define your own user
-    defined format for making tool calls. The function_tag format looks like this,
-    <function=function_name>(parameters)</function>
-
-    The detailed prompts for each of these formats are added to llama cli
-    """
diff --git a/src/llama_stack_client/types/tool_def_param.py b/src/llama_stack_client/types/tool_def_param.py
index 10950cf2..42d27fbd 100644
--- a/src/llama_stack_client/types/tool_def_param.py
+++ b/src/llama_stack_client/types/tool_def_param.py
@@ -3,7 +3,7 @@
 from __future__ import annotations
 
 from typing import Dict, Union, Iterable
-from typing_extensions import Literal, Required, TypedDict
+from typing_extensions import Required, TypedDict
 
 __all__ = ["ToolDefParam", "Parameter"]
 
@@ -28,16 +28,3 @@ class ToolDefParam(TypedDict, total=False):
     metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
 
     parameters: Iterable[Parameter]
-
-    tool_prompt_format: Literal["json", "function_tag", "python_list"]
-    """
-    `json` -- Refers to the json format for calling tools. The json format takes the
-    form like { "type": "function", "function" : { "name": "function_name",
-    "description": "function_description", "parameters": {...} } }
-
-    `function_tag` -- This is an example of how you could define your own user
-    defined format for making tool calls. The function_tag format looks like this,
-    <function=function_name>(parameters)</function>
-
-    The detailed prompts for each of these formats are added to llama cli
-    """
diff --git a/tests/api_resources/test_agents.py b/tests/api_resources/test_agents.py
index b46b02d2..61ed3109 100644
--- a/tests/api_resources/test_agents.py
+++ b/tests/api_resources/test_agents.py
@@ -51,7 +51,6 @@ def test_method_create_with_all_params(self, client: LlamaStackClient) -> None:
                                 "default": True,
                             }
                         ],
-                        "tool_prompt_format": "json",
                     }
                 ],
                 "input_shields": ["string"],
@@ -185,7 +184,6 @@ async def test_method_create_with_all_params(self, async_client: AsyncLlamaStack
                                 "default": True,
                             }
                         ],
-                        "tool_prompt_format": "json",
                     }
                 ],
                 "input_shields": ["string"],
diff --git a/tests/test_models.py b/tests/test_models.py
index 9cb5de7b..ee96638a 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -844,3 +844,13 @@ class Model(BaseModel):
     assert m.alias == "foo"
     assert isinstance(m.union, str)
     assert m.union == "bar"
+
+
+@pytest.mark.skipif(not PYDANTIC_V2, reason="TypeAliasType is not supported in Pydantic v1")
+def test_field_named_cls() -> None:
+    class Model(BaseModel):
+        cls: str
+
+    m = construct_type(value={"cls": "foo"}, type_=Model)
+    assert isinstance(m, Model)
+    assert isinstance(m.cls, str)