diff --git a/src/llama_stack_client/resources/agents/agents.py b/src/llama_stack_client/resources/agents/agents.py
index c270e1a2..860fb4fa 100644
--- a/src/llama_stack_client/resources/agents/agents.py
+++ b/src/llama_stack_client/resources/agents/agents.py
@@ -32,7 +32,6 @@
 from ..._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
 from ..._utils import (
     maybe_transform,
-    strip_not_given,
     async_maybe_transform,
 )
 from ..._compat import cached_property
@@ -86,8 +85,6 @@ def create(
         self,
         *,
         agent_config: AgentConfig,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -105,15 +102,6 @@ def create(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._post(
             "/v1/agents",
             body=maybe_transform({"agent_config": agent_config}, agent_create_params.AgentCreateParams),
@@ -127,8 +115,6 @@ def delete(
         self,
         agent_id: str,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -149,15 +135,6 @@ def delete(
         if not agent_id:
             raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._delete(
             f"/v1/agents/{agent_id}",
             options=make_request_options(
@@ -203,8 +180,6 @@ async def create(
         self,
         *,
         agent_config: AgentConfig,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -222,15 +197,6 @@ async def create(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._post(
             "/v1/agents",
             body=await async_maybe_transform({"agent_config": agent_config}, agent_create_params.AgentCreateParams),
@@ -244,8 +210,6 @@ async def delete(
         self,
         agent_id: str,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -266,15 +230,6 @@ async def delete(
         if not agent_id:
             raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._delete(
             f"/v1/agents/{agent_id}",
             options=make_request_options(
diff --git a/src/llama_stack_client/resources/agents/session.py b/src/llama_stack_client/resources/agents/session.py
index 8aff2477..9ccb420d 100644
--- a/src/llama_stack_client/resources/agents/session.py
+++ b/src/llama_stack_client/resources/agents/session.py
@@ -9,7 +9,6 @@
 from ..._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
 from ..._utils import (
     maybe_transform,
-    strip_not_given,
     async_maybe_transform,
 )
 from ..._compat import cached_property
@@ -53,8 +52,6 @@ def create(
         agent_id: str,
         *,
         session_name: str,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -74,15 +71,6 @@ def create(
         """
         if not agent_id:
             raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._post(
             f"/v1/agents/{agent_id}/session",
             body=maybe_transform({"session_name": session_name}, session_create_params.SessionCreateParams),
@@ -98,8 +86,6 @@ def retrieve(
         *,
         agent_id: str,
         turn_ids: List[str] | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -121,15 +107,6 @@ def retrieve(
             raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
         if not session_id:
             raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._get(
             f"/v1/agents/{agent_id}/session/{session_id}",
             options=make_request_options(
@@ -147,8 +124,6 @@ def delete(
         session_id: str,
         *,
         agent_id: str,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -171,15 +146,6 @@ def delete(
         if not session_id:
             raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._delete(
             f"/v1/agents/{agent_id}/session/{session_id}",
             options=make_request_options(
@@ -214,8 +180,6 @@ async def create(
         agent_id: str,
         *,
         session_name: str,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -235,15 +199,6 @@ async def create(
         """
         if not agent_id:
             raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._post(
             f"/v1/agents/{agent_id}/session",
             body=await async_maybe_transform({"session_name": session_name}, session_create_params.SessionCreateParams),
@@ -259,8 +214,6 @@ async def retrieve(
         *,
         agent_id: str,
         turn_ids: List[str] | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -282,15 +235,6 @@ async def retrieve(
             raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
         if not session_id:
             raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._get(
             f"/v1/agents/{agent_id}/session/{session_id}",
             options=make_request_options(
@@ -310,8 +254,6 @@ async def delete(
         session_id: str,
         *,
         agent_id: str,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -334,15 +276,6 @@ async def delete(
         if not session_id:
             raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._delete(
             f"/v1/agents/{agent_id}/session/{session_id}",
             options=make_request_options(
diff --git a/src/llama_stack_client/resources/agents/steps.py b/src/llama_stack_client/resources/agents/steps.py
index 53f8f3b0..1b4ec362 100644
--- a/src/llama_stack_client/resources/agents/steps.py
+++ b/src/llama_stack_client/resources/agents/steps.py
@@ -5,7 +5,6 @@
 import httpx
 
 from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ..._utils import strip_not_given
 from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
 from ..._response import (
@@ -47,8 +46,6 @@ def retrieve(
         agent_id: str,
         session_id: str,
         turn_id: str,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -74,15 +71,6 @@ def retrieve(
             raise ValueError(f"Expected a non-empty value for `turn_id` but received {turn_id!r}")
         if not step_id:
             raise ValueError(f"Expected a non-empty value for `step_id` but received {step_id!r}")
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._get(
             f"/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}",
             options=make_request_options(
@@ -119,8 +107,6 @@ async def retrieve(
         agent_id: str,
         session_id: str,
         turn_id: str,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -146,15 +132,6 @@ async def retrieve(
             raise ValueError(f"Expected a non-empty value for `turn_id` but received {turn_id!r}")
         if not step_id:
             raise ValueError(f"Expected a non-empty value for `step_id` but received {step_id!r}")
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._get(
             f"/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}",
             options=make_request_options(
diff --git a/src/llama_stack_client/resources/agents/turn.py b/src/llama_stack_client/resources/agents/turn.py
index b76b65f1..8f2e9989 100644
--- a/src/llama_stack_client/resources/agents/turn.py
+++ b/src/llama_stack_client/resources/agents/turn.py
@@ -11,7 +11,6 @@
 from ..._utils import (
     required_args,
     maybe_transform,
-    strip_not_given,
     async_maybe_transform,
 )
 from ..._compat import cached_property
@@ -61,8 +60,6 @@ def create(
         documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN,
         stream: Literal[False] | NotGiven = NOT_GIVEN,
         toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -92,8 +89,6 @@ def create(
         stream: Literal[True],
         documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN,
         toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -123,8 +118,6 @@ def create(
         stream: bool,
         documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN,
         toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -154,8 +147,6 @@ def create(
         documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN,
         stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
         toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -168,15 +159,6 @@ def create(
         if not session_id:
             raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
         extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})}
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return cast(
             TurnCreateResponse,
             self._post(
@@ -207,8 +189,6 @@ def retrieve(
         *,
         agent_id: str,
         session_id: str,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -232,15 +212,6 @@ def retrieve(
             raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
         if not turn_id:
             raise ValueError(f"Expected a non-empty value for `turn_id` but received {turn_id!r}")
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._get(
             f"/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}",
             options=make_request_options(
@@ -280,8 +251,6 @@ async def create(
         documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN,
         stream: Literal[False] | NotGiven = NOT_GIVEN,
         toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -311,8 +280,6 @@ async def create(
         stream: Literal[True],
         documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN,
         toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -342,8 +309,6 @@ async def create(
         stream: bool,
         documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN,
         toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -373,8 +338,6 @@ async def create(
         documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN,
         stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
         toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -387,15 +350,6 @@ async def create(
         if not session_id:
             raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
         extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})}
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return cast(
             TurnCreateResponse,
             await self._post(
@@ -426,8 +380,6 @@ async def retrieve(
         *,
         agent_id: str,
         session_id: str,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -451,15 +403,6 @@ async def retrieve(
             raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
         if not turn_id:
             raise ValueError(f"Expected a non-empty value for `turn_id` but received {turn_id!r}")
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._get(
             f"/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}",
             options=make_request_options(
diff --git a/src/llama_stack_client/resources/batch_inference.py b/src/llama_stack_client/resources/batch_inference.py
index ea76b295..39ddc1ee 100644
--- a/src/llama_stack_client/resources/batch_inference.py
+++ b/src/llama_stack_client/resources/batch_inference.py
@@ -11,7 +11,6 @@
 from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
 from .._utils import (
     maybe_transform,
-    strip_not_given,
     async_maybe_transform,
 )
 from .._compat import cached_property
@@ -25,6 +24,7 @@
 from .._base_client import make_request_options
 from ..types.shared_params.message import Message
 from ..types.shared.batch_completion import BatchCompletion
+from ..types.shared_params.response_format import ResponseFormat
 from ..types.shared_params.sampling_params import SamplingParams
 from ..types.shared_params.interleaved_content import InterleavedContent
 from ..types.batch_inference_chat_completion_response import BatchInferenceChatCompletionResponse
@@ -58,12 +58,11 @@ def chat_completion(
         messages_batch: Iterable[Iterable[Message]],
         model: str,
         logprobs: batch_inference_chat_completion_params.Logprobs | NotGiven = NOT_GIVEN,
+        response_format: ResponseFormat | NotGiven = NOT_GIVEN,
         sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
         tool_choice: Literal["auto", "required"] | NotGiven = NOT_GIVEN,
         tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN,
         tools: Iterable[batch_inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -73,16 +72,6 @@ def chat_completion(
     ) -> BatchInferenceChatCompletionResponse:
         """
         Args:
-          tool_prompt_format: `json` -- Refers to the json format for calling tools. The json format takes the
-              form like { "type": "function", "function" : { "name": "function_name",
-              "description": "function_description", "parameters": {...} } }
-
-              `function_tag` -- This is an example of how you could define your own user
-              defined format for making tool calls. The function_tag format looks like this,
-              <function=function_name>(parameters)</function>
-
-              The detailed prompts for each of these formats are added to llama cli
-
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -91,15 +80,6 @@ def chat_completion(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._post(
             "/v1/batch-inference/chat-completion",
             body=maybe_transform(
@@ -107,6 +87,7 @@ def chat_completion(
                     "messages_batch": messages_batch,
                     "model": model,
                     "logprobs": logprobs,
+                    "response_format": response_format,
                     "sampling_params": sampling_params,
                     "tool_choice": tool_choice,
                     "tool_prompt_format": tool_prompt_format,
@@ -126,9 +107,8 @@ def completion(
         content_batch: List[InterleavedContent],
         model: str,
         logprobs: batch_inference_completion_params.Logprobs | NotGiven = NOT_GIVEN,
+        response_format: ResponseFormat | NotGiven = NOT_GIVEN,
         sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -146,15 +126,6 @@ def completion(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._post(
             "/v1/batch-inference/completion",
             body=maybe_transform(
@@ -162,6 +133,7 @@ def completion(
                     "content_batch": content_batch,
                     "model": model,
                     "logprobs": logprobs,
+                    "response_format": response_format,
                     "sampling_params": sampling_params,
                 },
                 batch_inference_completion_params.BatchInferenceCompletionParams,
@@ -199,12 +171,11 @@ async def chat_completion(
         messages_batch: Iterable[Iterable[Message]],
         model: str,
         logprobs: batch_inference_chat_completion_params.Logprobs | NotGiven = NOT_GIVEN,
+        response_format: ResponseFormat | NotGiven = NOT_GIVEN,
         sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
         tool_choice: Literal["auto", "required"] | NotGiven = NOT_GIVEN,
         tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN,
         tools: Iterable[batch_inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -214,16 +185,6 @@ async def chat_completion(
     ) -> BatchInferenceChatCompletionResponse:
         """
         Args:
-          tool_prompt_format: `json` -- Refers to the json format for calling tools. The json format takes the
-              form like { "type": "function", "function" : { "name": "function_name",
-              "description": "function_description", "parameters": {...} } }
-
-              `function_tag` -- This is an example of how you could define your own user
-              defined format for making tool calls. The function_tag format looks like this,
-              <function=function_name>(parameters)</function>
-
-              The detailed prompts for each of these formats are added to llama cli
-
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -232,15 +193,6 @@ async def chat_completion(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._post(
             "/v1/batch-inference/chat-completion",
             body=await async_maybe_transform(
@@ -248,6 +200,7 @@ async def chat_completion(
                     "messages_batch": messages_batch,
                     "model": model,
                     "logprobs": logprobs,
+                    "response_format": response_format,
                     "sampling_params": sampling_params,
                     "tool_choice": tool_choice,
                     "tool_prompt_format": tool_prompt_format,
@@ -267,9 +220,8 @@ async def completion(
         content_batch: List[InterleavedContent],
         model: str,
         logprobs: batch_inference_completion_params.Logprobs | NotGiven = NOT_GIVEN,
+        response_format: ResponseFormat | NotGiven = NOT_GIVEN,
         sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -287,15 +239,6 @@ async def completion(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._post(
             "/v1/batch-inference/completion",
             body=await async_maybe_transform(
@@ -303,6 +246,7 @@ async def completion(
                     "content_batch": content_batch,
                     "model": model,
                     "logprobs": logprobs,
+                    "response_format": response_format,
                     "sampling_params": sampling_params,
                 },
                 batch_inference_completion_params.BatchInferenceCompletionParams,
diff --git a/src/llama_stack_client/resources/datasetio.py b/src/llama_stack_client/resources/datasetio.py
index 1441dff0..c69ee34a 100644
--- a/src/llama_stack_client/resources/datasetio.py
+++ b/src/llama_stack_client/resources/datasetio.py
@@ -10,7 +10,6 @@
 from .._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
 from .._utils import (
     maybe_transform,
-    strip_not_given,
     async_maybe_transform,
 )
 from .._compat import cached_property
@@ -52,8 +51,6 @@ def append_rows(
         *,
         dataset_id: str,
         rows: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]],
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -72,15 +69,6 @@ def append_rows(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._post(
             "/v1/datasetio/rows",
             body=maybe_transform(
@@ -103,8 +91,6 @@ def get_rows_paginated(
         rows_in_page: int,
         filter_condition: str | NotGiven = NOT_GIVEN,
         page_token: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -122,15 +108,6 @@ def get_rows_paginated(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._get(
             "/v1/datasetio/rows",
             options=make_request_options(
@@ -177,8 +154,6 @@ async def append_rows(
         *,
         dataset_id: str,
         rows: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]],
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -197,15 +172,6 @@ async def append_rows(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._post(
             "/v1/datasetio/rows",
             body=await async_maybe_transform(
@@ -228,8 +194,6 @@ async def get_rows_paginated(
         rows_in_page: int,
         filter_condition: str | NotGiven = NOT_GIVEN,
         page_token: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -247,15 +211,6 @@ async def get_rows_paginated(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._get(
             "/v1/datasetio/rows",
             options=make_request_options(
diff --git a/src/llama_stack_client/resources/datasets.py b/src/llama_stack_client/resources/datasets.py
index 7e9d9ebb..59457a45 100644
--- a/src/llama_stack_client/resources/datasets.py
+++ b/src/llama_stack_client/resources/datasets.py
@@ -10,7 +10,6 @@
 from .._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
 from .._utils import (
     maybe_transform,
-    strip_not_given,
     async_maybe_transform,
 )
 from .._compat import cached_property
@@ -55,8 +54,6 @@ def retrieve(
         self,
         dataset_id: str,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -76,15 +73,6 @@ def retrieve(
         """
         if not dataset_id:
             raise ValueError(f"Expected a non-empty value for `dataset_id` but received {dataset_id!r}")
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._get(
             f"/v1/datasets/{dataset_id}",
             options=make_request_options(
@@ -96,8 +84,6 @@ def retrieve(
     def list(
         self,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -105,25 +91,6 @@ def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> DatasetListResponse:
-        """
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._get(
             "/v1/datasets",
             options=make_request_options(
@@ -145,8 +112,6 @@ def register(
         metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
         provider_dataset_id: str | NotGiven = NOT_GIVEN,
         provider_id: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -165,15 +130,6 @@ def register(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._post(
             "/v1/datasets",
             body=maybe_transform(
@@ -197,8 +153,6 @@ def unregister(
         self,
         dataset_id: str,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -219,15 +173,6 @@ def unregister(
         if not dataset_id:
             raise ValueError(f"Expected a non-empty value for `dataset_id` but received {dataset_id!r}")
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._delete(
             f"/v1/datasets/{dataset_id}",
             options=make_request_options(
@@ -261,8 +206,6 @@ async def retrieve(
         self,
         dataset_id: str,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -282,15 +225,6 @@ async def retrieve(
         """
         if not dataset_id:
             raise ValueError(f"Expected a non-empty value for `dataset_id` but received {dataset_id!r}")
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._get(
             f"/v1/datasets/{dataset_id}",
             options=make_request_options(
@@ -302,8 +236,6 @@ async def retrieve(
     async def list(
         self,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -311,25 +243,6 @@ async def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> DatasetListResponse:
-        """
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._get(
             "/v1/datasets",
             options=make_request_options(
@@ -351,8 +264,6 @@ async def register(
         metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
         provider_dataset_id: str | NotGiven = NOT_GIVEN,
         provider_id: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -371,15 +282,6 @@ async def register(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._post(
             "/v1/datasets",
             body=await async_maybe_transform(
@@ -403,8 +305,6 @@ async def unregister(
         self,
         dataset_id: str,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -425,15 +325,6 @@ async def unregister(
         if not dataset_id:
             raise ValueError(f"Expected a non-empty value for `dataset_id` but received {dataset_id!r}")
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._delete(
             f"/v1/datasets/{dataset_id}",
             options=make_request_options(
diff --git a/src/llama_stack_client/resources/eval/eval.py b/src/llama_stack_client/resources/eval/eval.py
index a3ebbdfe..7795064a 100644
--- a/src/llama_stack_client/resources/eval/eval.py
+++ b/src/llama_stack_client/resources/eval/eval.py
@@ -18,7 +18,6 @@
 from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
 from ..._utils import (
     maybe_transform,
-    strip_not_given,
     async_maybe_transform,
 )
 from ..._compat import cached_property
@@ -68,8 +67,6 @@ def evaluate_rows(
         input_rows: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]],
         scoring_functions: List[str],
         task_config: EvalTaskConfigParam,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -89,15 +86,6 @@ def evaluate_rows(
         """
         if not task_id:
             raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}")
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._post(
             f"/v1/eval/tasks/{task_id}/evaluations",
             body=maybe_transform(
@@ -119,8 +107,6 @@ def run_eval(
         task_id: str,
         *,
         task_config: EvalTaskConfigParam,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -140,15 +126,6 @@ def run_eval(
         """
         if not task_id:
             raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}")
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._post(
             f"/v1/eval/tasks/{task_id}/jobs",
             body=maybe_transform({"task_config": task_config}, eval_run_eval_params.EvalRunEvalParams),
@@ -190,8 +167,6 @@ async def evaluate_rows(
         input_rows: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]],
         scoring_functions: List[str],
         task_config: EvalTaskConfigParam,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -211,15 +186,6 @@ async def evaluate_rows(
         """
         if not task_id:
             raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}")
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._post(
             f"/v1/eval/tasks/{task_id}/evaluations",
             body=await async_maybe_transform(
@@ -241,8 +207,6 @@ async def run_eval(
         task_id: str,
         *,
         task_config: EvalTaskConfigParam,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -262,15 +226,6 @@ async def run_eval(
         """
         if not task_id:
             raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}")
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._post(
             f"/v1/eval/tasks/{task_id}/jobs",
             body=await async_maybe_transform({"task_config": task_config}, eval_run_eval_params.EvalRunEvalParams),
diff --git a/src/llama_stack_client/resources/eval/jobs.py b/src/llama_stack_client/resources/eval/jobs.py
index f8bf6cd0..ba3e0777 100644
--- a/src/llama_stack_client/resources/eval/jobs.py
+++ b/src/llama_stack_client/resources/eval/jobs.py
@@ -7,7 +7,6 @@
 import httpx
 
 from ..._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
-from ..._utils import strip_not_given
 from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
 from ..._response import (
@@ -48,8 +47,6 @@ def retrieve(
         job_id: str,
         *,
         task_id: str,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -71,15 +68,6 @@ def retrieve(
             raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}")
         if not job_id:
             raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._get(
             f"/v1/eval/tasks/{task_id}/jobs/{job_id}/result",
             options=make_request_options(
@@ -93,8 +81,6 @@ def cancel(
         job_id: str,
         *,
         task_id: str,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -117,15 +103,6 @@ def cancel(
         if not job_id:
             raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._delete(
             f"/v1/eval/tasks/{task_id}/jobs/{job_id}",
             options=make_request_options(
@@ -139,8 +116,6 @@ def status(
         job_id: str,
         *,
         task_id: str,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -162,15 +137,6 @@ def status(
             raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}")
         if not job_id:
             raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._get(
             f"/v1/eval/tasks/{task_id}/jobs/{job_id}",
             options=make_request_options(
@@ -205,8 +171,6 @@ async def retrieve(
         job_id: str,
         *,
         task_id: str,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -228,15 +192,6 @@ async def retrieve(
             raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}")
         if not job_id:
             raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._get(
             f"/v1/eval/tasks/{task_id}/jobs/{job_id}/result",
             options=make_request_options(
@@ -250,8 +205,6 @@ async def cancel(
         job_id: str,
         *,
         task_id: str,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -274,15 +227,6 @@ async def cancel(
         if not job_id:
             raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._delete(
             f"/v1/eval/tasks/{task_id}/jobs/{job_id}",
             options=make_request_options(
@@ -296,8 +240,6 @@ async def status(
         job_id: str,
         *,
         task_id: str,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -319,15 +261,6 @@ async def status(
             raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}")
         if not job_id:
             raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._get(
             f"/v1/eval/tasks/{task_id}/jobs/{job_id}",
             options=make_request_options(
diff --git a/src/llama_stack_client/resources/eval_tasks.py b/src/llama_stack_client/resources/eval_tasks.py
index f6d4b960..82a07839 100644
--- a/src/llama_stack_client/resources/eval_tasks.py
+++ b/src/llama_stack_client/resources/eval_tasks.py
@@ -10,7 +10,6 @@
 from .._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
 from .._utils import (
     maybe_transform,
-    strip_not_given,
     async_maybe_transform,
 )
 from .._compat import cached_property
@@ -53,8 +52,6 @@ def retrieve(
         self,
         eval_task_id: str,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -74,15 +71,6 @@ def retrieve(
         """
         if not eval_task_id:
             raise ValueError(f"Expected a non-empty value for `eval_task_id` but received {eval_task_id!r}")
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._get(
             f"/v1/eval-tasks/{eval_task_id}",
             options=make_request_options(
@@ -94,8 +82,6 @@ def retrieve(
     def list(
         self,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -103,25 +89,6 @@ def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> EvalTaskListResponse:
-        """
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._get(
             "/v1/eval-tasks",
             options=make_request_options(
@@ -143,8 +110,6 @@ def register(
         metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
         provider_eval_task_id: str | NotGiven = NOT_GIVEN,
         provider_id: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -163,15 +128,6 @@ def register(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._post(
             "/v1/eval-tasks",
             body=maybe_transform(
@@ -216,8 +172,6 @@ async def retrieve(
         self,
         eval_task_id: str,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -237,15 +191,6 @@ async def retrieve(
         """
         if not eval_task_id:
             raise ValueError(f"Expected a non-empty value for `eval_task_id` but received {eval_task_id!r}")
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._get(
             f"/v1/eval-tasks/{eval_task_id}",
             options=make_request_options(
@@ -257,8 +202,6 @@ async def retrieve(
     async def list(
         self,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -266,25 +209,6 @@ async def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> EvalTaskListResponse:
-        """
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._get(
             "/v1/eval-tasks",
             options=make_request_options(
@@ -306,8 +230,6 @@ async def register(
         metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
         provider_eval_task_id: str | NotGiven = NOT_GIVEN,
         provider_id: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -326,15 +248,6 @@ async def register(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._post(
             "/v1/eval-tasks",
             body=await async_maybe_transform(
diff --git a/src/llama_stack_client/resources/inference.py b/src/llama_stack_client/resources/inference.py
index 7f2b0966..ce88b3b8 100644
--- a/src/llama_stack_client/resources/inference.py
+++ b/src/llama_stack_client/resources/inference.py
@@ -16,7 +16,6 @@
 from .._utils import (
     required_args,
     maybe_transform,
-    strip_not_given,
     async_maybe_transform,
 )
 from .._compat import cached_property
@@ -73,8 +72,6 @@ def chat_completion(
         tool_choice: Literal["auto", "required"] | NotGiven = NOT_GIVEN,
         tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN,
         tools: Iterable[inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -83,16 +80,38 @@ def chat_completion(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> InferenceChatCompletionResponse:
         """
+        Generate a chat completion for the given messages using the specified model.
+
         Args:
-          tool_prompt_format: `json` -- Refers to the json format for calling tools. The json format takes the
-              form like { "type": "function", "function" : { "name": "function_name",
-              "description": "function_description", "parameters": {...} } }
+          messages: List of messages in the conversation
+
+          model_id: The identifier of the model to use. The model must be registered with Llama
+              Stack and available via the /models endpoint.
+
+          logprobs: (Optional) If specified, log probabilities for each token position will be
+              returned.
+
+          response_format: (Optional) Grammar specification for guided (structured) decoding. There are two
+              options: - `ResponseFormat.json_schema`: The grammar is a JSON schema. Most
+              providers support this format. - `ResponseFormat.grammar`: The grammar is a BNF
+              grammar. This format is more flexible, but not all providers support it.
+
+          sampling_params: Parameters to control the sampling strategy
+
+          stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
+              False.
+
+          tool_choice: (Optional) Whether tool use is required or automatic. Defaults to
+              ToolChoice.auto.
 
-              `function_tag` -- This is an example of how you could define your own user
-              defined format for making tool calls. The function_tag format looks like this,
-              <function=function_name>(parameters)</function>
+          tool_prompt_format: (Optional) Instructs the model how to format tool calls. By default, Llama Stack
+              will attempt to use a format that is best adapted to the model. -
+              `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. -
+              `ToolPromptFormat.function_tag`: The tool calls are enclosed in a
+              <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls
+              are output as Python syntax -- a list of function calls.
 
-              The detailed prompts for each of these formats are added to llama cli
+          tools: (Optional) List of tool definitions available to the model
 
           extra_headers: Send extra headers
 
@@ -117,8 +136,6 @@ def chat_completion(
         tool_choice: Literal["auto", "required"] | NotGiven = NOT_GIVEN,
         tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN,
         tools: Iterable[inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -127,16 +144,38 @@ def chat_completion(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Stream[InferenceChatCompletionResponse]:
         """
+        Generate a chat completion for the given messages using the specified model.
+
         Args:
-          tool_prompt_format: `json` -- Refers to the json format for calling tools. The json format takes the
-              form like { "type": "function", "function" : { "name": "function_name",
-              "description": "function_description", "parameters": {...} } }
+          messages: List of messages in the conversation
+
+          model_id: The identifier of the model to use. The model must be registered with Llama
+              Stack and available via the /models endpoint.
+
+          stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
+              False.
+
+          logprobs: (Optional) If specified, log probabilities for each token position will be
+              returned.
+
+          response_format: (Optional) Grammar specification for guided (structured) decoding. There are two
+              options: - `ResponseFormat.json_schema`: The grammar is a JSON schema. Most
+              providers support this format. - `ResponseFormat.grammar`: The grammar is a BNF
+              grammar. This format is more flexible, but not all providers support it.
+
+          sampling_params: Parameters to control the sampling strategy
 
-              `function_tag` -- This is an example of how you could define your own user
-              defined format for making tool calls. The function_tag format looks like this,
-              <function=function_name>(parameters)</function>
+          tool_choice: (Optional) Whether tool use is required or automatic. Defaults to
+              ToolChoice.auto.
 
-              The detailed prompts for each of these formats are added to llama cli
+          tool_prompt_format: (Optional) Instructs the model how to format tool calls. By default, Llama Stack
+              will attempt to use a format that is best adapted to the model. -
+              `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. -
+              `ToolPromptFormat.function_tag`: The tool calls are enclosed in a
+              <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls
+              are output as Python syntax -- a list of function calls.
+
+          tools: (Optional) List of tool definitions available to the model
 
           extra_headers: Send extra headers
 
@@ -161,8 +200,6 @@ def chat_completion(
         tool_choice: Literal["auto", "required"] | NotGiven = NOT_GIVEN,
         tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN,
         tools: Iterable[inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -171,16 +208,38 @@ def chat_completion(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> InferenceChatCompletionResponse | Stream[InferenceChatCompletionResponse]:
         """
+        Generate a chat completion for the given messages using the specified model.
+
         Args:
-          tool_prompt_format: `json` -- Refers to the json format for calling tools. The json format takes the
-              form like { "type": "function", "function" : { "name": "function_name",
-              "description": "function_description", "parameters": {...} } }
+          messages: List of messages in the conversation
+
+          model_id: The identifier of the model to use. The model must be registered with Llama
+              Stack and available via the /models endpoint.
+
+          stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
+              False.
+
+          logprobs: (Optional) If specified, log probabilities for each token position will be
+              returned.
+
+          response_format: (Optional) Grammar specification for guided (structured) decoding. There are two
+              options: - `ResponseFormat.json_schema`: The grammar is a JSON schema. Most
+              providers support this format. - `ResponseFormat.grammar`: The grammar is a BNF
+              grammar. This format is more flexible, but not all providers support it.
+
+          sampling_params: Parameters to control the sampling strategy
 
-              `function_tag` -- This is an example of how you could define your own user
-              defined format for making tool calls. The function_tag format looks like this,
-              <function=function_name>(parameters)</function>
+          tool_choice: (Optional) Whether tool use is required or automatic. Defaults to
+              ToolChoice.auto.
 
-              The detailed prompts for each of these formats are added to llama cli
+          tool_prompt_format: (Optional) Instructs the model how to format tool calls. By default, Llama Stack
+              will attempt to use a format that is best adapted to the model. -
+              `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. -
+              `ToolPromptFormat.function_tag`: The tool calls are enclosed in a
+              <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls
+              are output as Python syntax -- a list of function calls.
+
+          tools: (Optional) List of tool definitions available to the model
 
           extra_headers: Send extra headers
 
@@ -205,8 +264,6 @@ def chat_completion(
         tool_choice: Literal["auto", "required"] | NotGiven = NOT_GIVEN,
         tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN,
         tools: Iterable[inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -215,15 +272,6 @@ def chat_completion(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> InferenceChatCompletionResponse | Stream[InferenceChatCompletionResponse]:
         extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})}
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return cast(
             InferenceChatCompletionResponse,
             self._post(
@@ -263,8 +311,6 @@ def completion(
         response_format: ResponseFormat | NotGiven = NOT_GIVEN,
         sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
         stream: Literal[False] | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -273,7 +319,24 @@ def completion(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> InferenceCompletionResponse:
         """
+        Generate a completion for the given content using the specified model.
+
         Args:
+          content: The content to generate a completion for
+
+          model_id: The identifier of the model to use. The model must be registered with Llama
+              Stack and available via the /models endpoint.
+
+          logprobs: (Optional) If specified, log probabilities for each token position will be
+              returned.
+
+          response_format: (Optional) Grammar specification for guided (structured) decoding
+
+          sampling_params: (Optional) Parameters to control the sampling strategy
+
+          stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
+              False.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -294,8 +357,6 @@ def completion(
         logprobs: inference_completion_params.Logprobs | NotGiven = NOT_GIVEN,
         response_format: ResponseFormat | NotGiven = NOT_GIVEN,
         sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -304,7 +365,24 @@ def completion(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Stream[InferenceCompletionResponse]:
         """
+        Generate a completion for the given content using the specified model.
+
         Args:
+          content: The content to generate a completion for
+
+          model_id: The identifier of the model to use. The model must be registered with Llama
+              Stack and available via the /models endpoint.
+
+          stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
+              False.
+
+          logprobs: (Optional) If specified, log probabilities for each token position will be
+              returned.
+
+          response_format: (Optional) Grammar specification for guided (structured) decoding
+
+          sampling_params: (Optional) Parameters to control the sampling strategy
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -325,8 +403,6 @@ def completion(
         logprobs: inference_completion_params.Logprobs | NotGiven = NOT_GIVEN,
         response_format: ResponseFormat | NotGiven = NOT_GIVEN,
         sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -335,7 +411,24 @@ def completion(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> InferenceCompletionResponse | Stream[InferenceCompletionResponse]:
         """
+        Generate a completion for the given content using the specified model.
+
         Args:
+          content: The content to generate a completion for
+
+          model_id: The identifier of the model to use. The model must be registered with Llama
+              Stack and available via the /models endpoint.
+
+          stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
+              False.
+
+          logprobs: (Optional) If specified, log probabilities for each token position will be
+              returned.
+
+          response_format: (Optional) Grammar specification for guided (structured) decoding
+
+          sampling_params: (Optional) Parameters to control the sampling strategy
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -356,8 +449,6 @@ def completion(
         response_format: ResponseFormat | NotGiven = NOT_GIVEN,
         sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
         stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -366,15 +457,6 @@ def completion(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> InferenceCompletionResponse | Stream[InferenceCompletionResponse]:
         extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})}
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return cast(
             InferenceCompletionResponse,
             self._post(
@@ -406,8 +488,6 @@ def embeddings(
         *,
         contents: List[InterleavedContent],
         model_id: str,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -416,7 +496,16 @@ def embeddings(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> EmbeddingsResponse:
         """
+        Generate embeddings for content pieces using the specified model.
+
         Args:
+          contents: List of contents to generate embeddings for. Note that content can be
+              multimodal. The behavior depends on the model and provider. Some models may only
+              support text.
+
+          model_id: The identifier of the model to use. The model must be an embedding model
+              registered with Llama Stack and available via the /models endpoint.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -425,15 +514,6 @@ def embeddings(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._post(
             "/v1/inference/embeddings",
             body=maybe_transform(
@@ -483,8 +563,6 @@ async def chat_completion(
         tool_choice: Literal["auto", "required"] | NotGiven = NOT_GIVEN,
         tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN,
         tools: Iterable[inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -493,16 +571,38 @@ async def chat_completion(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> InferenceChatCompletionResponse:
         """
+        Generate a chat completion for the given messages using the specified model.
+
         Args:
-          tool_prompt_format: `json` -- Refers to the json format for calling tools. The json format takes the
-              form like { "type": "function", "function" : { "name": "function_name",
-              "description": "function_description", "parameters": {...} } }
+          messages: List of messages in the conversation
+
+          model_id: The identifier of the model to use. The model must be registered with Llama
+              Stack and available via the /models endpoint.
+
+          logprobs: (Optional) If specified, log probabilities for each token position will be
+              returned.
+
+          response_format: (Optional) Grammar specification for guided (structured) decoding. There are two
+              options: - `ResponseFormat.json_schema`: The grammar is a JSON schema. Most
+              providers support this format. - `ResponseFormat.grammar`: The grammar is a BNF
+              grammar. This format is more flexible, but not all providers support it.
+
+          sampling_params: Parameters to control the sampling strategy
+
+          stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
+              False.
+
+          tool_choice: (Optional) Whether tool use is required or automatic. Defaults to
+              ToolChoice.auto.
 
-              `function_tag` -- This is an example of how you could define your own user
-              defined format for making tool calls. The function_tag format looks like this,
-              <function=function_name>(parameters)</function>
+          tool_prompt_format: (Optional) Instructs the model how to format tool calls. By default, Llama Stack
+              will attempt to use a format that is best adapted to the model. -
+              `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. -
+              `ToolPromptFormat.function_tag`: The tool calls are enclosed in a
+              <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls
+              are output as Python syntax -- a list of function calls.
 
-              The detailed prompts for each of these formats are added to llama cli
+          tools: (Optional) List of tool definitions available to the model
 
           extra_headers: Send extra headers
 
@@ -527,8 +627,6 @@ async def chat_completion(
         tool_choice: Literal["auto", "required"] | NotGiven = NOT_GIVEN,
         tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN,
         tools: Iterable[inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -537,16 +635,38 @@ async def chat_completion(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> AsyncStream[InferenceChatCompletionResponse]:
         """
+        Generate a chat completion for the given messages using the specified model.
+
         Args:
-          tool_prompt_format: `json` -- Refers to the json format for calling tools. The json format takes the
-              form like { "type": "function", "function" : { "name": "function_name",
-              "description": "function_description", "parameters": {...} } }
+          messages: List of messages in the conversation
+
+          model_id: The identifier of the model to use. The model must be registered with Llama
+              Stack and available via the /models endpoint.
+
+          stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
+              False.
+
+          logprobs: (Optional) If specified, log probabilities for each token position will be
+              returned.
+
+          response_format: (Optional) Grammar specification for guided (structured) decoding. There are two
+              options: - `ResponseFormat.json_schema`: The grammar is a JSON schema. Most
+              providers support this format. - `ResponseFormat.grammar`: The grammar is a BNF
+              grammar. This format is more flexible, but not all providers support it.
+
+          sampling_params: Parameters to control the sampling strategy
 
-              `function_tag` -- This is an example of how you could define your own user
-              defined format for making tool calls. The function_tag format looks like this,
-              <function=function_name>(parameters)</function>
+          tool_choice: (Optional) Whether tool use is required or automatic. Defaults to
+              ToolChoice.auto.
 
-              The detailed prompts for each of these formats are added to llama cli
+          tool_prompt_format: (Optional) Instructs the model how to format tool calls. By default, Llama Stack
+              will attempt to use a format that is best adapted to the model. -
+              `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. -
+              `ToolPromptFormat.function_tag`: The tool calls are enclosed in a
+              <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls
+              are output as Python syntax -- a list of function calls.
+
+          tools: (Optional) List of tool definitions available to the model
 
           extra_headers: Send extra headers
 
@@ -571,8 +691,6 @@ async def chat_completion(
         tool_choice: Literal["auto", "required"] | NotGiven = NOT_GIVEN,
         tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN,
         tools: Iterable[inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -581,16 +699,38 @@ async def chat_completion(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> InferenceChatCompletionResponse | AsyncStream[InferenceChatCompletionResponse]:
         """
+        Generate a chat completion for the given messages using the specified model.
+
         Args:
-          tool_prompt_format: `json` -- Refers to the json format for calling tools. The json format takes the
-              form like { "type": "function", "function" : { "name": "function_name",
-              "description": "function_description", "parameters": {...} } }
+          messages: List of messages in the conversation
+
+          model_id: The identifier of the model to use. The model must be registered with Llama
+              Stack and available via the /models endpoint.
+
+          stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
+              False.
+
+          logprobs: (Optional) If specified, log probabilities for each token position will be
+              returned.
+
+          response_format: (Optional) Grammar specification for guided (structured) decoding. There are two
+              options: - `ResponseFormat.json_schema`: The grammar is a JSON schema. Most
+              providers support this format. - `ResponseFormat.grammar`: The grammar is a BNF
+              grammar. This format is more flexible, but not all providers support it.
+
+          sampling_params: Parameters to control the sampling strategy
 
-              `function_tag` -- This is an example of how you could define your own user
-              defined format for making tool calls. The function_tag format looks like this,
-              <function=function_name>(parameters)</function>
+          tool_choice: (Optional) Whether tool use is required or automatic. Defaults to
+              ToolChoice.auto.
 
-              The detailed prompts for each of these formats are added to llama cli
+          tool_prompt_format: (Optional) Instructs the model how to format tool calls. By default, Llama Stack
+              will attempt to use a format that is best adapted to the model. -
+              `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. -
+              `ToolPromptFormat.function_tag`: The tool calls are enclosed in a
+              <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls
+              are output as Python syntax -- a list of function calls.
+
+          tools: (Optional) List of tool definitions available to the model
 
           extra_headers: Send extra headers
 
@@ -615,8 +755,6 @@ async def chat_completion(
         tool_choice: Literal["auto", "required"] | NotGiven = NOT_GIVEN,
         tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN,
         tools: Iterable[inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -625,15 +763,6 @@ async def chat_completion(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> InferenceChatCompletionResponse | AsyncStream[InferenceChatCompletionResponse]:
         extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})}
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return cast(
             InferenceChatCompletionResponse,
             await self._post(
@@ -673,8 +802,6 @@ async def completion(
         response_format: ResponseFormat | NotGiven = NOT_GIVEN,
         sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
         stream: Literal[False] | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -683,7 +810,24 @@ async def completion(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> InferenceCompletionResponse:
         """
+        Generate a completion for the given content using the specified model.
+
         Args:
+          content: The content to generate a completion for
+
+          model_id: The identifier of the model to use. The model must be registered with Llama
+              Stack and available via the /models endpoint.
+
+          logprobs: (Optional) If specified, log probabilities for each token position will be
+              returned.
+
+          response_format: (Optional) Grammar specification for guided (structured) decoding
+
+          sampling_params: (Optional) Parameters to control the sampling strategy
+
+          stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
+              False.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -704,8 +848,6 @@ async def completion(
         logprobs: inference_completion_params.Logprobs | NotGiven = NOT_GIVEN,
         response_format: ResponseFormat | NotGiven = NOT_GIVEN,
         sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -714,7 +856,24 @@ async def completion(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> AsyncStream[InferenceCompletionResponse]:
         """
+        Generate a completion for the given content using the specified model.
+
         Args:
+          content: The content to generate a completion for
+
+          model_id: The identifier of the model to use. The model must be registered with Llama
+              Stack and available via the /models endpoint.
+
+          stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
+              False.
+
+          logprobs: (Optional) If specified, log probabilities for each token position will be
+              returned.
+
+          response_format: (Optional) Grammar specification for guided (structured) decoding
+
+          sampling_params: (Optional) Parameters to control the sampling strategy
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -735,8 +894,6 @@ async def completion(
         logprobs: inference_completion_params.Logprobs | NotGiven = NOT_GIVEN,
         response_format: ResponseFormat | NotGiven = NOT_GIVEN,
         sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -745,7 +902,24 @@ async def completion(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> InferenceCompletionResponse | AsyncStream[InferenceCompletionResponse]:
         """
+        Generate a completion for the given content using the specified model.
+
         Args:
+          content: The content to generate a completion for
+
+          model_id: The identifier of the model to use. The model must be registered with Llama
+              Stack and available via the /models endpoint.
+
+          stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
+              False.
+
+          logprobs: (Optional) If specified, log probabilities for each token position will be
+              returned.
+
+          response_format: (Optional) Grammar specification for guided (structured) decoding
+
+          sampling_params: (Optional) Parameters to control the sampling strategy
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -766,8 +940,6 @@ async def completion(
         response_format: ResponseFormat | NotGiven = NOT_GIVEN,
         sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
         stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -776,15 +948,6 @@ async def completion(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> InferenceCompletionResponse | AsyncStream[InferenceCompletionResponse]:
         extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})}
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return cast(
             InferenceCompletionResponse,
             await self._post(
@@ -816,8 +979,6 @@ async def embeddings(
         *,
         contents: List[InterleavedContent],
         model_id: str,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -826,7 +987,16 @@ async def embeddings(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> EmbeddingsResponse:
         """
+        Generate embeddings for content pieces using the specified model.
+
         Args:
+          contents: List of contents to generate embeddings for. Note that content can be
+              multimodal. The behavior depends on the model and provider. Some models may only
+              support text.
+
+          model_id: The identifier of the model to use. The model must be an embedding model
+              registered with Llama Stack and available via the /models endpoint.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -835,15 +1005,6 @@ async def embeddings(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._post(
             "/v1/inference/embeddings",
             body=await async_maybe_transform(
diff --git a/src/llama_stack_client/resources/inspect.py b/src/llama_stack_client/resources/inspect.py
index 2dfb9134..86fe1e43 100644
--- a/src/llama_stack_client/resources/inspect.py
+++ b/src/llama_stack_client/resources/inspect.py
@@ -5,7 +5,6 @@
 import httpx
 
 from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._utils import strip_not_given
 from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
 from .._response import (
@@ -44,8 +43,6 @@ def with_streaming_response(self) -> InspectResourceWithStreamingResponse:
     def health(
         self,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -53,25 +50,6 @@ def health(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> HealthInfo:
-        """
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._get(
             "/v1/health",
             options=make_request_options(
@@ -83,8 +61,6 @@ def health(
     def version(
         self,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -92,25 +68,6 @@ def version(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> VersionInfo:
-        """
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._get(
             "/v1/version",
             options=make_request_options(
@@ -143,8 +100,6 @@ def with_streaming_response(self) -> AsyncInspectResourceWithStreamingResponse:
     async def health(
         self,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -152,25 +107,6 @@ async def health(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> HealthInfo:
-        """
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._get(
             "/v1/health",
             options=make_request_options(
@@ -182,8 +118,6 @@ async def health(
     async def version(
         self,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -191,25 +125,6 @@ async def version(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> VersionInfo:
-        """
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._get(
             "/v1/version",
             options=make_request_options(
diff --git a/src/llama_stack_client/resources/models.py b/src/llama_stack_client/resources/models.py
index 2d5297f2..584c2001 100644
--- a/src/llama_stack_client/resources/models.py
+++ b/src/llama_stack_client/resources/models.py
@@ -11,7 +11,6 @@
 from .._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
 from .._utils import (
     maybe_transform,
-    strip_not_given,
     async_maybe_transform,
 )
 from .._compat import cached_property
@@ -54,8 +53,6 @@ def retrieve(
         self,
         model_id: str,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -75,15 +72,6 @@ def retrieve(
         """
         if not model_id:
             raise ValueError(f"Expected a non-empty value for `model_id` but received {model_id!r}")
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._get(
             f"/v1/models/{model_id}",
             options=make_request_options(
@@ -95,8 +83,6 @@ def retrieve(
     def list(
         self,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -104,25 +90,6 @@ def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ModelListResponse:
-        """
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._get(
             "/v1/models",
             options=make_request_options(
@@ -143,8 +110,6 @@ def register(
         model_type: Literal["llm", "embedding"] | NotGiven = NOT_GIVEN,
         provider_id: str | NotGiven = NOT_GIVEN,
         provider_model_id: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -162,15 +127,6 @@ def register(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._post(
             "/v1/models",
             body=maybe_transform(
@@ -193,8 +149,6 @@ def unregister(
         self,
         model_id: str,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -215,15 +169,6 @@ def unregister(
         if not model_id:
             raise ValueError(f"Expected a non-empty value for `model_id` but received {model_id!r}")
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._delete(
             f"/v1/models/{model_id}",
             options=make_request_options(
@@ -257,8 +202,6 @@ async def retrieve(
         self,
         model_id: str,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -278,15 +221,6 @@ async def retrieve(
         """
         if not model_id:
             raise ValueError(f"Expected a non-empty value for `model_id` but received {model_id!r}")
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._get(
             f"/v1/models/{model_id}",
             options=make_request_options(
@@ -298,8 +232,6 @@ async def retrieve(
     async def list(
         self,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -307,25 +239,6 @@ async def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ModelListResponse:
-        """
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._get(
             "/v1/models",
             options=make_request_options(
@@ -346,8 +259,6 @@ async def register(
         model_type: Literal["llm", "embedding"] | NotGiven = NOT_GIVEN,
         provider_id: str | NotGiven = NOT_GIVEN,
         provider_model_id: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -365,15 +276,6 @@ async def register(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._post(
             "/v1/models",
             body=await async_maybe_transform(
@@ -396,8 +298,6 @@ async def unregister(
         self,
         model_id: str,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -418,15 +318,6 @@ async def unregister(
         if not model_id:
             raise ValueError(f"Expected a non-empty value for `model_id` but received {model_id!r}")
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._delete(
             f"/v1/models/{model_id}",
             options=make_request_options(
diff --git a/src/llama_stack_client/resources/post_training/job.py b/src/llama_stack_client/resources/post_training/job.py
index d36de350..e6b67120 100644
--- a/src/llama_stack_client/resources/post_training/job.py
+++ b/src/llama_stack_client/resources/post_training/job.py
@@ -9,7 +9,6 @@
 from ..._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
 from ..._utils import (
     maybe_transform,
-    strip_not_given,
     async_maybe_transform,
 )
 from ..._compat import cached_property
@@ -53,8 +52,6 @@ def with_streaming_response(self) -> JobResourceWithStreamingResponse:
     def list(
         self,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -62,25 +59,6 @@ def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> JobListResponse:
-        """
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._get(
             "/v1/post-training/jobs",
             options=make_request_options(
@@ -97,8 +75,6 @@ def artifacts(
         self,
         *,
         job_uuid: str,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -116,15 +92,6 @@ def artifacts(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._get(
             "/v1/post-training/job/artifacts",
             options=make_request_options(
@@ -141,8 +108,6 @@ def cancel(
         self,
         *,
         job_uuid: str,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -161,15 +126,6 @@ def cancel(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._post(
             "/v1/post-training/job/cancel",
             body=maybe_transform({"job_uuid": job_uuid}, job_cancel_params.JobCancelParams),
@@ -183,8 +139,6 @@ def status(
         self,
         *,
         job_uuid: str,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -202,15 +156,6 @@ def status(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._get(
             "/v1/post-training/job/status",
             options=make_request_options(
@@ -247,8 +192,6 @@ def with_streaming_response(self) -> AsyncJobResourceWithStreamingResponse:
     async def list(
         self,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -256,25 +199,6 @@ async def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> JobListResponse:
-        """
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._get(
             "/v1/post-training/jobs",
             options=make_request_options(
@@ -291,8 +215,6 @@ async def artifacts(
         self,
         *,
         job_uuid: str,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -310,15 +232,6 @@ async def artifacts(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._get(
             "/v1/post-training/job/artifacts",
             options=make_request_options(
@@ -335,8 +248,6 @@ async def cancel(
         self,
         *,
         job_uuid: str,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -355,15 +266,6 @@ async def cancel(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._post(
             "/v1/post-training/job/cancel",
             body=await async_maybe_transform({"job_uuid": job_uuid}, job_cancel_params.JobCancelParams),
@@ -377,8 +279,6 @@ async def status(
         self,
         *,
         job_uuid: str,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -396,15 +296,6 @@ async def status(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._get(
             "/v1/post-training/job/status",
             options=make_request_options(
diff --git a/src/llama_stack_client/resources/post_training/post_training.py b/src/llama_stack_client/resources/post_training/post_training.py
index 888733ea..a93a1ebb 100644
--- a/src/llama_stack_client/resources/post_training/post_training.py
+++ b/src/llama_stack_client/resources/post_training/post_training.py
@@ -21,7 +21,6 @@
 from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
 from ..._utils import (
     maybe_transform,
-    strip_not_given,
     async_maybe_transform,
 )
 from ..._compat import cached_property
@@ -72,8 +71,6 @@ def preference_optimize(
         job_uuid: str,
         logger_config: Dict[str, Union[bool, float, str, Iterable[object], object, None]],
         training_config: post_training_preference_optimize_params.TrainingConfig,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -91,15 +88,6 @@ def preference_optimize(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._post(
             "/v1/post-training/preference-optimize",
             body=maybe_transform(
@@ -129,8 +117,6 @@ def supervised_fine_tune(
         training_config: post_training_supervised_fine_tune_params.TrainingConfig,
         algorithm_config: AlgorithmConfigParam | NotGiven = NOT_GIVEN,
         checkpoint_dir: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -148,15 +134,6 @@ def supervised_fine_tune(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._post(
             "/v1/post-training/supervised-fine-tune",
             body=maybe_transform(
@@ -211,8 +188,6 @@ async def preference_optimize(
         job_uuid: str,
         logger_config: Dict[str, Union[bool, float, str, Iterable[object], object, None]],
         training_config: post_training_preference_optimize_params.TrainingConfig,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -230,15 +205,6 @@ async def preference_optimize(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._post(
             "/v1/post-training/preference-optimize",
             body=await async_maybe_transform(
@@ -268,8 +234,6 @@ async def supervised_fine_tune(
         training_config: post_training_supervised_fine_tune_params.TrainingConfig,
         algorithm_config: AlgorithmConfigParam | NotGiven = NOT_GIVEN,
         checkpoint_dir: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -287,15 +251,6 @@ async def supervised_fine_tune(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._post(
             "/v1/post-training/supervised-fine-tune",
             body=await async_maybe_transform(
diff --git a/src/llama_stack_client/resources/providers.py b/src/llama_stack_client/resources/providers.py
index 190002a0..1bddc809 100644
--- a/src/llama_stack_client/resources/providers.py
+++ b/src/llama_stack_client/resources/providers.py
@@ -7,7 +7,6 @@
 import httpx
 
 from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._utils import strip_not_given
 from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
 from .._response import (
@@ -46,8 +45,6 @@ def with_streaming_response(self) -> ProvidersResourceWithStreamingResponse:
     def list(
         self,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -55,25 +52,6 @@ def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ProviderListResponse:
-        """
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._get(
             "/v1/inspect/providers",
             options=make_request_options(
@@ -110,8 +88,6 @@ def with_streaming_response(self) -> AsyncProvidersResourceWithStreamingResponse
     async def list(
         self,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -119,25 +95,6 @@ async def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ProviderListResponse:
-        """
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._get(
             "/v1/inspect/providers",
             options=make_request_options(
diff --git a/src/llama_stack_client/resources/routes.py b/src/llama_stack_client/resources/routes.py
index 2f10b219..a0aefdfa 100644
--- a/src/llama_stack_client/resources/routes.py
+++ b/src/llama_stack_client/resources/routes.py
@@ -7,7 +7,6 @@
 import httpx
 
 from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._utils import strip_not_given
 from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
 from .._response import (
@@ -46,8 +45,6 @@ def with_streaming_response(self) -> RoutesResourceWithStreamingResponse:
     def list(
         self,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -55,25 +52,6 @@ def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> RouteListResponse:
-        """
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._get(
             "/v1/inspect/routes",
             options=make_request_options(
@@ -110,8 +88,6 @@ def with_streaming_response(self) -> AsyncRoutesResourceWithStreamingResponse:
     async def list(
         self,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -119,25 +95,6 @@ async def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> RouteListResponse:
-        """
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._get(
             "/v1/inspect/routes",
             options=make_request_options(
diff --git a/src/llama_stack_client/resources/safety.py b/src/llama_stack_client/resources/safety.py
index bc6333e4..7382c81c 100644
--- a/src/llama_stack_client/resources/safety.py
+++ b/src/llama_stack_client/resources/safety.py
@@ -10,7 +10,6 @@
 from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
 from .._utils import (
     maybe_transform,
-    strip_not_given,
     async_maybe_transform,
 )
 from .._compat import cached_property
@@ -54,8 +53,6 @@ def run_shield(
         messages: Iterable[Message],
         params: Dict[str, Union[bool, float, str, Iterable[object], object, None]],
         shield_id: str,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -73,15 +70,6 @@ def run_shield(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._post(
             "/v1/safety/run-shield",
             body=maybe_transform(
@@ -125,8 +113,6 @@ async def run_shield(
         messages: Iterable[Message],
         params: Dict[str, Union[bool, float, str, Iterable[object], object, None]],
         shield_id: str,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -144,15 +130,6 @@ async def run_shield(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._post(
             "/v1/safety/run-shield",
             body=await async_maybe_transform(
diff --git a/src/llama_stack_client/resources/scoring.py b/src/llama_stack_client/resources/scoring.py
index 80b2e79b..be2a60ff 100644
--- a/src/llama_stack_client/resources/scoring.py
+++ b/src/llama_stack_client/resources/scoring.py
@@ -10,7 +10,6 @@
 from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
 from .._utils import (
     maybe_transform,
-    strip_not_given,
     async_maybe_transform,
 )
 from .._compat import cached_property
@@ -54,8 +53,6 @@ def score(
         *,
         input_rows: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]],
         scoring_functions: Dict[str, Optional[ScoringFnParamsParam]],
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -73,15 +70,6 @@ def score(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._post(
             "/v1/scoring/score",
             body=maybe_transform(
@@ -103,8 +91,6 @@ def score_batch(
         dataset_id: str,
         save_results_dataset: bool,
         scoring_functions: Dict[str, Optional[ScoringFnParamsParam]],
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -122,15 +108,6 @@ def score_batch(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._post(
             "/v1/scoring/score-batch",
             body=maybe_transform(
@@ -173,8 +150,6 @@ async def score(
         *,
         input_rows: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]],
         scoring_functions: Dict[str, Optional[ScoringFnParamsParam]],
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -192,15 +167,6 @@ async def score(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._post(
             "/v1/scoring/score",
             body=await async_maybe_transform(
@@ -222,8 +188,6 @@ async def score_batch(
         dataset_id: str,
         save_results_dataset: bool,
         scoring_functions: Dict[str, Optional[ScoringFnParamsParam]],
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -241,15 +205,6 @@ async def score_batch(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._post(
             "/v1/scoring/score-batch",
             body=await async_maybe_transform(
diff --git a/src/llama_stack_client/resources/scoring_functions.py b/src/llama_stack_client/resources/scoring_functions.py
index 858a8e8b..1bc535ef 100644
--- a/src/llama_stack_client/resources/scoring_functions.py
+++ b/src/llama_stack_client/resources/scoring_functions.py
@@ -10,7 +10,6 @@
 from .._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
 from .._utils import (
     maybe_transform,
-    strip_not_given,
     async_maybe_transform,
 )
 from .._compat import cached_property
@@ -55,8 +54,6 @@ def retrieve(
         self,
         scoring_fn_id: str,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -76,15 +73,6 @@ def retrieve(
         """
         if not scoring_fn_id:
             raise ValueError(f"Expected a non-empty value for `scoring_fn_id` but received {scoring_fn_id!r}")
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._get(
             f"/v1/scoring-functions/{scoring_fn_id}",
             options=make_request_options(
@@ -96,8 +84,6 @@ def retrieve(
     def list(
         self,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -105,25 +91,6 @@ def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ScoringFunctionListResponse:
-        """
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._get(
             "/v1/scoring-functions",
             options=make_request_options(
@@ -145,8 +112,6 @@ def register(
         params: ScoringFnParamsParam | NotGiven = NOT_GIVEN,
         provider_id: str | NotGiven = NOT_GIVEN,
         provider_scoring_fn_id: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -165,15 +130,6 @@ def register(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._post(
             "/v1/scoring-functions",
             body=maybe_transform(
@@ -218,8 +174,6 @@ async def retrieve(
         self,
         scoring_fn_id: str,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -239,15 +193,6 @@ async def retrieve(
         """
         if not scoring_fn_id:
             raise ValueError(f"Expected a non-empty value for `scoring_fn_id` but received {scoring_fn_id!r}")
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._get(
             f"/v1/scoring-functions/{scoring_fn_id}",
             options=make_request_options(
@@ -259,8 +204,6 @@ async def retrieve(
     async def list(
         self,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -268,25 +211,6 @@ async def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ScoringFunctionListResponse:
-        """
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._get(
             "/v1/scoring-functions",
             options=make_request_options(
@@ -308,8 +232,6 @@ async def register(
         params: ScoringFnParamsParam | NotGiven = NOT_GIVEN,
         provider_id: str | NotGiven = NOT_GIVEN,
         provider_scoring_fn_id: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -328,15 +250,6 @@ async def register(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._post(
             "/v1/scoring-functions",
             body=await async_maybe_transform(
diff --git a/src/llama_stack_client/resources/shields.py b/src/llama_stack_client/resources/shields.py
index 3a1dab86..4205f972 100644
--- a/src/llama_stack_client/resources/shields.py
+++ b/src/llama_stack_client/resources/shields.py
@@ -10,7 +10,6 @@
 from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
 from .._utils import (
     maybe_transform,
-    strip_not_given,
     async_maybe_transform,
 )
 from .._compat import cached_property
@@ -53,8 +52,6 @@ def retrieve(
         self,
         identifier: str,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -74,15 +71,6 @@ def retrieve(
         """
         if not identifier:
             raise ValueError(f"Expected a non-empty value for `identifier` but received {identifier!r}")
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._get(
             f"/v1/shields/{identifier}",
             options=make_request_options(
@@ -94,8 +82,6 @@ def retrieve(
     def list(
         self,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -103,25 +89,6 @@ def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ShieldListResponse:
-        """
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._get(
             "/v1/shields",
             options=make_request_options(
@@ -141,8 +108,6 @@ def register(
         params: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
         provider_id: str | NotGiven = NOT_GIVEN,
         provider_shield_id: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -160,15 +125,6 @@ def register(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._post(
             "/v1/shields",
             body=maybe_transform(
@@ -211,8 +167,6 @@ async def retrieve(
         self,
         identifier: str,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -232,15 +186,6 @@ async def retrieve(
         """
         if not identifier:
             raise ValueError(f"Expected a non-empty value for `identifier` but received {identifier!r}")
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._get(
             f"/v1/shields/{identifier}",
             options=make_request_options(
@@ -252,8 +197,6 @@ async def retrieve(
     async def list(
         self,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -261,25 +204,6 @@ async def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ShieldListResponse:
-        """
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._get(
             "/v1/shields",
             options=make_request_options(
@@ -299,8 +223,6 @@ async def register(
         params: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
         provider_id: str | NotGiven = NOT_GIVEN,
         provider_shield_id: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -318,15 +240,6 @@ async def register(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._post(
             "/v1/shields",
             body=await async_maybe_transform(
diff --git a/src/llama_stack_client/resources/synthetic_data_generation.py b/src/llama_stack_client/resources/synthetic_data_generation.py
index 1795cc49..91d6ee72 100644
--- a/src/llama_stack_client/resources/synthetic_data_generation.py
+++ b/src/llama_stack_client/resources/synthetic_data_generation.py
@@ -11,7 +11,6 @@
 from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
 from .._utils import (
     maybe_transform,
-    strip_not_given,
     async_maybe_transform,
 )
 from .._compat import cached_property
@@ -55,8 +54,6 @@ def generate(
         dialogs: Iterable[Message],
         filtering_function: Literal["none", "random", "top_k", "top_p", "top_k_top_p", "sigmoid"],
         model: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -74,15 +71,6 @@ def generate(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._post(
             "/v1/synthetic-data-generation/generate",
             body=maybe_transform(
@@ -126,8 +114,6 @@ async def generate(
         dialogs: Iterable[Message],
         filtering_function: Literal["none", "random", "top_k", "top_p", "top_k_top_p", "sigmoid"],
         model: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -145,15 +131,6 @@ async def generate(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._post(
             "/v1/synthetic-data-generation/generate",
             body=await async_maybe_transform(
diff --git a/src/llama_stack_client/resources/telemetry.py b/src/llama_stack_client/resources/telemetry.py
index 5b1707b9..391ae6d0 100644
--- a/src/llama_stack_client/resources/telemetry.py
+++ b/src/llama_stack_client/resources/telemetry.py
@@ -16,7 +16,6 @@
 from .._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
 from .._utils import (
     maybe_transform,
-    strip_not_given,
     async_maybe_transform,
 )
 from .._compat import cached_property
@@ -65,8 +64,6 @@ def get_span(
         span_id: str,
         *,
         trace_id: str,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -88,15 +85,6 @@ def get_span(
             raise ValueError(f"Expected a non-empty value for `trace_id` but received {trace_id!r}")
         if not span_id:
             raise ValueError(f"Expected a non-empty value for `span_id` but received {span_id!r}")
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._get(
             f"/v1/telemetry/traces/{trace_id}/spans/{span_id}",
             options=make_request_options(
@@ -111,8 +99,6 @@ def get_span_tree(
         *,
         attributes_to_return: List[str] | NotGiven = NOT_GIVEN,
         max_depth: int | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -132,15 +118,6 @@ def get_span_tree(
         """
         if not span_id:
             raise ValueError(f"Expected a non-empty value for `span_id` but received {span_id!r}")
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._get(
             f"/v1/telemetry/spans/{span_id}/tree",
             options=make_request_options(
@@ -164,8 +141,6 @@ def get_trace(
         self,
         trace_id: str,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -185,15 +160,6 @@ def get_trace(
         """
         if not trace_id:
             raise ValueError(f"Expected a non-empty value for `trace_id` but received {trace_id!r}")
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._get(
             f"/v1/telemetry/traces/{trace_id}",
             options=make_request_options(
@@ -207,8 +173,6 @@ def log_event(
         *,
         event: EventParam,
         ttl_seconds: int,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -227,15 +191,6 @@ def log_event(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._post(
             "/v1/telemetry/events",
             body=maybe_transform(
@@ -257,8 +212,6 @@ def query_spans(
         attribute_filters: Iterable[QueryConditionParam],
         attributes_to_return: List[str],
         max_depth: int | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -276,15 +229,6 @@ def query_spans(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._get(
             "/v1/telemetry/spans",
             options=make_request_options(
@@ -312,8 +256,6 @@ def query_traces(
         limit: int | NotGiven = NOT_GIVEN,
         offset: int | NotGiven = NOT_GIVEN,
         order_by: List[str] | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -331,15 +273,6 @@ def query_traces(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._get(
             "/v1/telemetry/traces",
             options=make_request_options(
@@ -368,8 +301,6 @@ def save_spans_to_dataset(
         attributes_to_save: List[str],
         dataset_id: str,
         max_depth: int | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -388,15 +319,6 @@ def save_spans_to_dataset(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._post(
             "/v1/telemetry/spans/export",
             body=maybe_transform(
@@ -440,8 +362,6 @@ async def get_span(
         span_id: str,
         *,
         trace_id: str,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -463,15 +383,6 @@ async def get_span(
             raise ValueError(f"Expected a non-empty value for `trace_id` but received {trace_id!r}")
         if not span_id:
             raise ValueError(f"Expected a non-empty value for `span_id` but received {span_id!r}")
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._get(
             f"/v1/telemetry/traces/{trace_id}/spans/{span_id}",
             options=make_request_options(
@@ -486,8 +397,6 @@ async def get_span_tree(
         *,
         attributes_to_return: List[str] | NotGiven = NOT_GIVEN,
         max_depth: int | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -507,15 +416,6 @@ async def get_span_tree(
         """
         if not span_id:
             raise ValueError(f"Expected a non-empty value for `span_id` but received {span_id!r}")
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._get(
             f"/v1/telemetry/spans/{span_id}/tree",
             options=make_request_options(
@@ -539,8 +439,6 @@ async def get_trace(
         self,
         trace_id: str,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -560,15 +458,6 @@ async def get_trace(
         """
         if not trace_id:
             raise ValueError(f"Expected a non-empty value for `trace_id` but received {trace_id!r}")
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._get(
             f"/v1/telemetry/traces/{trace_id}",
             options=make_request_options(
@@ -582,8 +471,6 @@ async def log_event(
         *,
         event: EventParam,
         ttl_seconds: int,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -602,15 +489,6 @@ async def log_event(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._post(
             "/v1/telemetry/events",
             body=await async_maybe_transform(
@@ -632,8 +510,6 @@ async def query_spans(
         attribute_filters: Iterable[QueryConditionParam],
         attributes_to_return: List[str],
         max_depth: int | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -651,15 +527,6 @@ async def query_spans(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._get(
             "/v1/telemetry/spans",
             options=make_request_options(
@@ -687,8 +554,6 @@ async def query_traces(
         limit: int | NotGiven = NOT_GIVEN,
         offset: int | NotGiven = NOT_GIVEN,
         order_by: List[str] | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -706,15 +571,6 @@ async def query_traces(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._get(
             "/v1/telemetry/traces",
             options=make_request_options(
@@ -743,8 +599,6 @@ async def save_spans_to_dataset(
         attributes_to_save: List[str],
         dataset_id: str,
         max_depth: int | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -763,15 +617,6 @@ async def save_spans_to_dataset(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._post(
             "/v1/telemetry/spans/export",
             body=await async_maybe_transform(
diff --git a/src/llama_stack_client/resources/tool_runtime/rag_tool.py b/src/llama_stack_client/resources/tool_runtime/rag_tool.py
index 49cf1f67..da0ce761 100644
--- a/src/llama_stack_client/resources/tool_runtime/rag_tool.py
+++ b/src/llama_stack_client/resources/tool_runtime/rag_tool.py
@@ -9,7 +9,6 @@
 from ..._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
 from ..._utils import (
     maybe_transform,
-    strip_not_given,
     async_maybe_transform,
 )
 from ..._compat import cached_property
@@ -56,8 +55,6 @@ def insert(
         chunk_size_in_tokens: int,
         documents: Iterable[Document],
         vector_db_id: str,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -78,15 +75,6 @@ def insert(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._post(
             "/v1/tool-runtime/rag-tool/insert",
             body=maybe_transform(
@@ -109,8 +97,6 @@ def query(
         content: InterleavedContent,
         vector_db_ids: List[str],
         query_config: QueryConfig | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -130,15 +116,6 @@ def query(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._post(
             "/v1/tool-runtime/rag-tool/query",
             body=maybe_transform(
@@ -182,8 +159,6 @@ async def insert(
         chunk_size_in_tokens: int,
         documents: Iterable[Document],
         vector_db_id: str,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -204,15 +179,6 @@ async def insert(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._post(
             "/v1/tool-runtime/rag-tool/insert",
             body=await async_maybe_transform(
@@ -235,8 +201,6 @@ async def query(
         content: InterleavedContent,
         vector_db_ids: List[str],
         query_config: QueryConfig | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -256,15 +220,6 @@ async def query(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._post(
             "/v1/tool-runtime/rag-tool/query",
             body=await async_maybe_transform(
diff --git a/src/llama_stack_client/resources/tool_runtime/tool_runtime.py b/src/llama_stack_client/resources/tool_runtime/tool_runtime.py
index cf2a7a45..f3b92a74 100644
--- a/src/llama_stack_client/resources/tool_runtime/tool_runtime.py
+++ b/src/llama_stack_client/resources/tool_runtime/tool_runtime.py
@@ -10,7 +10,6 @@
 from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
 from ..._utils import (
     maybe_transform,
-    strip_not_given,
     async_maybe_transform,
 )
 from .rag_tool import (
@@ -67,8 +66,6 @@ def invoke_tool(
         *,
         kwargs: Dict[str, Union[bool, float, str, Iterable[object], object, None]],
         tool_name: str,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -88,15 +85,6 @@ def invoke_tool(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._post(
             "/v1/tool-runtime/invoke",
             body=maybe_transform(
@@ -117,8 +105,6 @@ def list_tools(
         *,
         mcp_endpoint: URL | NotGiven = NOT_GIVEN,
         tool_group_id: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -137,15 +123,6 @@ def list_tools(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         extra_headers = {"Accept": "application/jsonl", **(extra_headers or {})}
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._get(
             "/v1/tool-runtime/list-tools",
             options=make_request_options(
@@ -195,8 +172,6 @@ async def invoke_tool(
         *,
         kwargs: Dict[str, Union[bool, float, str, Iterable[object], object, None]],
         tool_name: str,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -216,15 +191,6 @@ async def invoke_tool(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._post(
             "/v1/tool-runtime/invoke",
             body=await async_maybe_transform(
@@ -245,8 +211,6 @@ async def list_tools(
         *,
         mcp_endpoint: URL | NotGiven = NOT_GIVEN,
         tool_group_id: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -265,15 +229,6 @@ async def list_tools(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         extra_headers = {"Accept": "application/jsonl", **(extra_headers or {})}
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._get(
             "/v1/tool-runtime/list-tools",
             options=make_request_options(
diff --git a/src/llama_stack_client/resources/toolgroups.py b/src/llama_stack_client/resources/toolgroups.py
index b318963a..234be628 100644
--- a/src/llama_stack_client/resources/toolgroups.py
+++ b/src/llama_stack_client/resources/toolgroups.py
@@ -10,7 +10,6 @@
 from .._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
 from .._utils import (
     maybe_transform,
-    strip_not_given,
     async_maybe_transform,
 )
 from .._compat import cached_property
@@ -53,8 +52,6 @@ def with_streaming_response(self) -> ToolgroupsResourceWithStreamingResponse:
     def list(
         self,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -62,27 +59,7 @@ def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ToolgroupListResponse:
-        """
-        List tool groups with optional provider
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
+        """List tool groups with optional provider"""
         return self._get(
             "/v1/toolgroups",
             options=make_request_options(
@@ -99,8 +76,6 @@ def get(
         self,
         toolgroup_id: str,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -120,15 +95,6 @@ def get(
         """
         if not toolgroup_id:
             raise ValueError(f"Expected a non-empty value for `toolgroup_id` but received {toolgroup_id!r}")
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._get(
             f"/v1/toolgroups/{toolgroup_id}",
             options=make_request_options(
@@ -144,8 +110,6 @@ def register(
         toolgroup_id: str,
         args: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
         mcp_endpoint: URL | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -166,15 +130,6 @@ def register(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._post(
             "/v1/toolgroups",
             body=maybe_transform(
@@ -196,8 +151,6 @@ def unregister(
         self,
         toolgroup_id: str,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -220,15 +173,6 @@ def unregister(
         if not toolgroup_id:
             raise ValueError(f"Expected a non-empty value for `toolgroup_id` but received {toolgroup_id!r}")
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._delete(
             f"/v1/toolgroups/{toolgroup_id}",
             options=make_request_options(
@@ -261,8 +205,6 @@ def with_streaming_response(self) -> AsyncToolgroupsResourceWithStreamingRespons
     async def list(
         self,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -270,27 +212,7 @@ async def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ToolgroupListResponse:
-        """
-        List tool groups with optional provider
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
+        """List tool groups with optional provider"""
         return await self._get(
             "/v1/toolgroups",
             options=make_request_options(
@@ -307,8 +229,6 @@ async def get(
         self,
         toolgroup_id: str,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -328,15 +248,6 @@ async def get(
         """
         if not toolgroup_id:
             raise ValueError(f"Expected a non-empty value for `toolgroup_id` but received {toolgroup_id!r}")
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._get(
             f"/v1/toolgroups/{toolgroup_id}",
             options=make_request_options(
@@ -352,8 +263,6 @@ async def register(
         toolgroup_id: str,
         args: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
         mcp_endpoint: URL | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -374,15 +283,6 @@ async def register(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._post(
             "/v1/toolgroups",
             body=await async_maybe_transform(
@@ -404,8 +304,6 @@ async def unregister(
         self,
         toolgroup_id: str,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -428,15 +326,6 @@ async def unregister(
         if not toolgroup_id:
             raise ValueError(f"Expected a non-empty value for `toolgroup_id` but received {toolgroup_id!r}")
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._delete(
             f"/v1/toolgroups/{toolgroup_id}",
             options=make_request_options(
diff --git a/src/llama_stack_client/resources/tools.py b/src/llama_stack_client/resources/tools.py
index 23176ba8..206389f3 100644
--- a/src/llama_stack_client/resources/tools.py
+++ b/src/llama_stack_client/resources/tools.py
@@ -10,7 +10,6 @@
 from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
 from .._utils import (
     maybe_transform,
-    strip_not_given,
     async_maybe_transform,
 )
 from .._compat import cached_property
@@ -53,8 +52,6 @@ def list(
         self,
         *,
         toolgroup_id: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -74,15 +71,6 @@ def list(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._get(
             "/v1/tools",
             options=make_request_options(
@@ -100,8 +88,6 @@ def get(
         self,
         tool_name: str,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -121,15 +107,6 @@ def get(
         """
         if not tool_name:
             raise ValueError(f"Expected a non-empty value for `tool_name` but received {tool_name!r}")
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._get(
             f"/v1/tools/{tool_name}",
             options=make_request_options(
@@ -163,8 +140,6 @@ async def list(
         self,
         *,
         toolgroup_id: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -184,15 +159,6 @@ async def list(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._get(
             "/v1/tools",
             options=make_request_options(
@@ -210,8 +176,6 @@ async def get(
         self,
         tool_name: str,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -231,15 +195,6 @@ async def get(
         """
         if not tool_name:
             raise ValueError(f"Expected a non-empty value for `tool_name` but received {tool_name!r}")
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._get(
             f"/v1/tools/{tool_name}",
             options=make_request_options(
diff --git a/src/llama_stack_client/resources/vector_dbs.py b/src/llama_stack_client/resources/vector_dbs.py
index 175edda8..63f9086f 100644
--- a/src/llama_stack_client/resources/vector_dbs.py
+++ b/src/llama_stack_client/resources/vector_dbs.py
@@ -10,7 +10,6 @@
 from .._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
 from .._utils import (
     maybe_transform,
-    strip_not_given,
     async_maybe_transform,
 )
 from .._compat import cached_property
@@ -54,8 +53,6 @@ def retrieve(
         self,
         vector_db_id: str,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -75,15 +72,6 @@ def retrieve(
         """
         if not vector_db_id:
             raise ValueError(f"Expected a non-empty value for `vector_db_id` but received {vector_db_id!r}")
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._get(
             f"/v1/vector-dbs/{vector_db_id}",
             options=make_request_options(
@@ -95,8 +83,6 @@ def retrieve(
     def list(
         self,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -104,25 +90,6 @@ def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> VectorDBListResponse:
-        """
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._get(
             "/v1/vector-dbs",
             options=make_request_options(
@@ -143,8 +110,6 @@ def register(
         embedding_dimension: int | NotGiven = NOT_GIVEN,
         provider_id: str | NotGiven = NOT_GIVEN,
         provider_vector_db_id: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -162,15 +127,6 @@ def register(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._post(
             "/v1/vector-dbs",
             body=maybe_transform(
@@ -193,8 +149,6 @@ def unregister(
         self,
         vector_db_id: str,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -215,15 +169,6 @@ def unregister(
         if not vector_db_id:
             raise ValueError(f"Expected a non-empty value for `vector_db_id` but received {vector_db_id!r}")
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._delete(
             f"/v1/vector-dbs/{vector_db_id}",
             options=make_request_options(
@@ -257,8 +202,6 @@ async def retrieve(
         self,
         vector_db_id: str,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -278,15 +221,6 @@ async def retrieve(
         """
         if not vector_db_id:
             raise ValueError(f"Expected a non-empty value for `vector_db_id` but received {vector_db_id!r}")
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._get(
             f"/v1/vector-dbs/{vector_db_id}",
             options=make_request_options(
@@ -298,8 +232,6 @@ async def retrieve(
     async def list(
         self,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -307,25 +239,6 @@ async def list(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> VectorDBListResponse:
-        """
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._get(
             "/v1/vector-dbs",
             options=make_request_options(
@@ -346,8 +259,6 @@ async def register(
         embedding_dimension: int | NotGiven = NOT_GIVEN,
         provider_id: str | NotGiven = NOT_GIVEN,
         provider_vector_db_id: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -365,15 +276,6 @@ async def register(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._post(
             "/v1/vector-dbs",
             body=await async_maybe_transform(
@@ -396,8 +298,6 @@ async def unregister(
         self,
         vector_db_id: str,
         *,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -418,15 +318,6 @@ async def unregister(
         if not vector_db_id:
             raise ValueError(f"Expected a non-empty value for `vector_db_id` but received {vector_db_id!r}")
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._delete(
             f"/v1/vector-dbs/{vector_db_id}",
             options=make_request_options(
diff --git a/src/llama_stack_client/resources/vector_io.py b/src/llama_stack_client/resources/vector_io.py
index bc96c5cb..e71f8f1d 100644
--- a/src/llama_stack_client/resources/vector_io.py
+++ b/src/llama_stack_client/resources/vector_io.py
@@ -10,7 +10,6 @@
 from .._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
 from .._utils import (
     maybe_transform,
-    strip_not_given,
     async_maybe_transform,
 )
 from .._compat import cached_property
@@ -54,8 +53,6 @@ def insert(
         chunks: Iterable[vector_io_insert_params.Chunk],
         vector_db_id: str,
         ttl_seconds: int | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -74,15 +71,6 @@ def insert(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._post(
             "/v1/vector-io/insert",
             body=maybe_transform(
@@ -105,8 +93,6 @@ def query(
         query: InterleavedContent,
         vector_db_id: str,
         params: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -124,15 +110,6 @@ def query(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return self._post(
             "/v1/vector-io/query",
             body=maybe_transform(
@@ -176,8 +153,6 @@ async def insert(
         chunks: Iterable[vector_io_insert_params.Chunk],
         vector_db_id: str,
         ttl_seconds: int | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -196,15 +171,6 @@ async def insert(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._post(
             "/v1/vector-io/insert",
             body=await async_maybe_transform(
@@ -227,8 +193,6 @@ async def query(
         query: InterleavedContent,
         vector_db_id: str,
         params: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        x_llama_stack_client_version: str | NotGiven = NOT_GIVEN,
-        x_llama_stack_provider_data: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -246,15 +210,6 @@ async def query(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {
-            **strip_not_given(
-                {
-                    "X-LlamaStack-Client-Version": x_llama_stack_client_version,
-                    "X-LlamaStack-Provider-Data": x_llama_stack_provider_data,
-                }
-            ),
-            **(extra_headers or {}),
-        }
         return await self._post(
             "/v1/vector-io/query",
             body=await async_maybe_transform(
diff --git a/src/llama_stack_client/types/agent_create_params.py b/src/llama_stack_client/types/agent_create_params.py
index 5eb9dbe2..cd857d81 100644
--- a/src/llama_stack_client/types/agent_create_params.py
+++ b/src/llama_stack_client/types/agent_create_params.py
@@ -2,9 +2,8 @@
 
 from __future__ import annotations
 
-from typing_extensions import Required, Annotated, TypedDict
+from typing_extensions import Required, TypedDict
 
-from .._utils import PropertyInfo
 from .shared_params.agent_config import AgentConfig
 
 __all__ = ["AgentCreateParams"]
@@ -12,7 +11,3 @@
 
 class AgentCreateParams(TypedDict, total=False):
     agent_config: Required[AgentConfig]
-
-    x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")]
-
-    x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")]
diff --git a/src/llama_stack_client/types/agents/session_create_params.py b/src/llama_stack_client/types/agents/session_create_params.py
index dc8762bc..048c48f7 100644
--- a/src/llama_stack_client/types/agents/session_create_params.py
+++ b/src/llama_stack_client/types/agents/session_create_params.py
@@ -2,16 +2,10 @@
 
 from __future__ import annotations
 
-from typing_extensions import Required, Annotated, TypedDict
-
-from ..._utils import PropertyInfo
+from typing_extensions import Required, TypedDict
 
 __all__ = ["SessionCreateParams"]
 
 
 class SessionCreateParams(TypedDict, total=False):
     session_name: Required[str]
-
-    x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")]
-
-    x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")]
diff --git a/src/llama_stack_client/types/agents/session_retrieve_params.py b/src/llama_stack_client/types/agents/session_retrieve_params.py
index df9cecf9..be9aab53 100644
--- a/src/llama_stack_client/types/agents/session_retrieve_params.py
+++ b/src/llama_stack_client/types/agents/session_retrieve_params.py
@@ -3,9 +3,7 @@
 from __future__ import annotations
 
 from typing import List
-from typing_extensions import Required, Annotated, TypedDict
-
-from ..._utils import PropertyInfo
+from typing_extensions import Required, TypedDict
 
 __all__ = ["SessionRetrieveParams"]
 
@@ -14,7 +12,3 @@ class SessionRetrieveParams(TypedDict, total=False):
     agent_id: Required[str]
 
     turn_ids: List[str]
-
-    x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")]
-
-    x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")]
diff --git a/src/llama_stack_client/types/agents/turn_create_params.py b/src/llama_stack_client/types/agents/turn_create_params.py
index 325876c4..f8586b6d 100644
--- a/src/llama_stack_client/types/agents/turn_create_params.py
+++ b/src/llama_stack_client/types/agents/turn_create_params.py
@@ -3,9 +3,8 @@
 from __future__ import annotations
 
 from typing import Dict, List, Union, Iterable
-from typing_extensions import Literal, Required, Annotated, TypeAlias, TypedDict
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
-from ..._utils import PropertyInfo
 from ..shared_params.url import URL
 from ..shared_params.user_message import UserMessage
 from ..shared_params.tool_response_message import ToolResponseMessage
@@ -35,10 +34,6 @@ class TurnCreateParamsBase(TypedDict, total=False):
 
     toolgroups: List[Toolgroup]
 
-    x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")]
-
-    x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")]
-
 
 Message: TypeAlias = Union[UserMessage, ToolResponseMessage]
 
diff --git a/src/llama_stack_client/types/batch_inference_chat_completion_params.py b/src/llama_stack_client/types/batch_inference_chat_completion_params.py
index d249abc6..b0b5ce51 100644
--- a/src/llama_stack_client/types/batch_inference_chat_completion_params.py
+++ b/src/llama_stack_client/types/batch_inference_chat_completion_params.py
@@ -3,10 +3,10 @@
 from __future__ import annotations
 
 from typing import Dict, Union, Iterable
-from typing_extensions import Literal, Required, Annotated, TypedDict
+from typing_extensions import Literal, Required, TypedDict
 
-from .._utils import PropertyInfo
 from .shared_params.message import Message
+from .shared_params.response_format import ResponseFormat
 from .shared_params.sampling_params import SamplingParams
 from .shared_params.tool_param_definition import ToolParamDefinition
 
@@ -20,32 +20,20 @@ class BatchInferenceChatCompletionParams(TypedDict, total=False):
 
     logprobs: Logprobs
 
+    response_format: ResponseFormat
+
     sampling_params: SamplingParams
 
     tool_choice: Literal["auto", "required"]
 
     tool_prompt_format: Literal["json", "function_tag", "python_list"]
-    """
-    `json` -- Refers to the json format for calling tools. The json format takes the
-    form like { "type": "function", "function" : { "name": "function_name",
-    "description": "function_description", "parameters": {...} } }
-
-    `function_tag` -- This is an example of how you could define your own user
-    defined format for making tool calls. The function_tag format looks like this,
-    <function=function_name>(parameters)</function>
-
-    The detailed prompts for each of these formats are added to llama cli
-    """
 
     tools: Iterable[Tool]
 
-    x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")]
-
-    x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")]
-
 
 class Logprobs(TypedDict, total=False):
     top_k: int
+    """How many tokens (for each position) to return log probabilities for."""
 
 
 class Tool(TypedDict, total=False):
diff --git a/src/llama_stack_client/types/batch_inference_chat_completion_response.py b/src/llama_stack_client/types/batch_inference_chat_completion_response.py
index 661c00df..b7472b59 100644
--- a/src/llama_stack_client/types/batch_inference_chat_completion_response.py
+++ b/src/llama_stack_client/types/batch_inference_chat_completion_response.py
@@ -1,12 +1,21 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import List
+from typing import List, Optional
 
 from .._models import BaseModel
+from .token_log_probs import TokenLogProbs
 from .shared.completion_message import CompletionMessage
 
-__all__ = ["BatchInferenceChatCompletionResponse"]
+__all__ = ["BatchInferenceChatCompletionResponse", "Batch"]
+
+
+class Batch(BaseModel):
+    completion_message: CompletionMessage
+    """The complete response message"""
+
+    logprobs: Optional[List[TokenLogProbs]] = None
+    """Optional log probabilities for generated tokens"""
 
 
 class BatchInferenceChatCompletionResponse(BaseModel):
-    completion_message_batch: List[CompletionMessage]
+    batch: List[Batch]
diff --git a/src/llama_stack_client/types/batch_inference_completion_params.py b/src/llama_stack_client/types/batch_inference_completion_params.py
index 1ce93255..0a33c5b3 100644
--- a/src/llama_stack_client/types/batch_inference_completion_params.py
+++ b/src/llama_stack_client/types/batch_inference_completion_params.py
@@ -3,9 +3,9 @@
 from __future__ import annotations
 
 from typing import List
-from typing_extensions import Required, Annotated, TypedDict
+from typing_extensions import Required, TypedDict
 
-from .._utils import PropertyInfo
+from .shared_params.response_format import ResponseFormat
 from .shared_params.sampling_params import SamplingParams
 from .shared_params.interleaved_content import InterleavedContent
 
@@ -19,12 +19,11 @@ class BatchInferenceCompletionParams(TypedDict, total=False):
 
     logprobs: Logprobs
 
-    sampling_params: SamplingParams
-
-    x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")]
+    response_format: ResponseFormat
 
-    x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")]
+    sampling_params: SamplingParams
 
 
 class Logprobs(TypedDict, total=False):
     top_k: int
+    """How many tokens (for each position) to return log probabilities for."""
diff --git a/src/llama_stack_client/types/completion_response.py b/src/llama_stack_client/types/completion_response.py
index c8d97e79..f21afc02 100644
--- a/src/llama_stack_client/types/completion_response.py
+++ b/src/llama_stack_client/types/completion_response.py
@@ -11,7 +11,10 @@
 
 class CompletionResponse(BaseModel):
     content: str
+    """The generated completion text"""
 
     stop_reason: Literal["end_of_turn", "end_of_message", "out_of_tokens"]
+    """Reason why generation stopped"""
 
     logprobs: Optional[List[TokenLogProbs]] = None
+    """Optional log probabilities for generated tokens"""
diff --git a/src/llama_stack_client/types/dataset_register_params.py b/src/llama_stack_client/types/dataset_register_params.py
index 54513c36..853485a6 100644
--- a/src/llama_stack_client/types/dataset_register_params.py
+++ b/src/llama_stack_client/types/dataset_register_params.py
@@ -3,9 +3,8 @@
 from __future__ import annotations
 
 from typing import Dict, Union, Iterable
-from typing_extensions import Required, Annotated, TypedDict
+from typing_extensions import Required, TypedDict
 
-from .._utils import PropertyInfo
 from .shared_params.url import URL
 from .shared_params.param_type import ParamType
 
@@ -24,7 +23,3 @@ class DatasetRegisterParams(TypedDict, total=False):
     provider_dataset_id: str
 
     provider_id: str
-
-    x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")]
-
-    x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")]
diff --git a/src/llama_stack_client/types/datasetio_append_rows_params.py b/src/llama_stack_client/types/datasetio_append_rows_params.py
index ef9ea42d..2378454c 100644
--- a/src/llama_stack_client/types/datasetio_append_rows_params.py
+++ b/src/llama_stack_client/types/datasetio_append_rows_params.py
@@ -3,9 +3,7 @@
 from __future__ import annotations
 
 from typing import Dict, Union, Iterable
-from typing_extensions import Required, Annotated, TypedDict
-
-from .._utils import PropertyInfo
+from typing_extensions import Required, TypedDict
 
 __all__ = ["DatasetioAppendRowsParams"]
 
@@ -14,7 +12,3 @@ class DatasetioAppendRowsParams(TypedDict, total=False):
     dataset_id: Required[str]
 
     rows: Required[Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]]
-
-    x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")]
-
-    x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")]
diff --git a/src/llama_stack_client/types/datasetio_get_rows_paginated_params.py b/src/llama_stack_client/types/datasetio_get_rows_paginated_params.py
index a22af6e5..8082691e 100644
--- a/src/llama_stack_client/types/datasetio_get_rows_paginated_params.py
+++ b/src/llama_stack_client/types/datasetio_get_rows_paginated_params.py
@@ -2,9 +2,7 @@
 
 from __future__ import annotations
 
-from typing_extensions import Required, Annotated, TypedDict
-
-from .._utils import PropertyInfo
+from typing_extensions import Required, TypedDict
 
 __all__ = ["DatasetioGetRowsPaginatedParams"]
 
@@ -17,7 +15,3 @@ class DatasetioGetRowsPaginatedParams(TypedDict, total=False):
     filter_condition: str
 
     page_token: str
-
-    x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")]
-
-    x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")]
diff --git a/src/llama_stack_client/types/embeddings_response.py b/src/llama_stack_client/types/embeddings_response.py
index 8c6d28e9..f36c6b97 100644
--- a/src/llama_stack_client/types/embeddings_response.py
+++ b/src/llama_stack_client/types/embeddings_response.py
@@ -9,3 +9,8 @@
 
 class EmbeddingsResponse(BaseModel):
     embeddings: List[List[float]]
+    """List of embedding vectors, one per input content.
+
+    Each embedding is a list of floats. The dimensionality of the embedding is
+    model-specific; you can check model metadata using /models/{model_id}
+    """
diff --git a/src/llama_stack_client/types/eval_evaluate_rows_params.py b/src/llama_stack_client/types/eval_evaluate_rows_params.py
index c3ed4bae..065764b5 100644
--- a/src/llama_stack_client/types/eval_evaluate_rows_params.py
+++ b/src/llama_stack_client/types/eval_evaluate_rows_params.py
@@ -3,9 +3,8 @@
 from __future__ import annotations
 
 from typing import Dict, List, Union, Iterable
-from typing_extensions import Required, Annotated, TypedDict
+from typing_extensions import Required, TypedDict
 
-from .._utils import PropertyInfo
 from .eval_task_config_param import EvalTaskConfigParam
 
 __all__ = ["EvalEvaluateRowsParams"]
@@ -17,7 +16,3 @@ class EvalEvaluateRowsParams(TypedDict, total=False):
     scoring_functions: Required[List[str]]
 
     task_config: Required[EvalTaskConfigParam]
-
-    x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")]
-
-    x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")]
diff --git a/src/llama_stack_client/types/eval_run_eval_params.py b/src/llama_stack_client/types/eval_run_eval_params.py
index cd37e293..9ee91af8 100644
--- a/src/llama_stack_client/types/eval_run_eval_params.py
+++ b/src/llama_stack_client/types/eval_run_eval_params.py
@@ -2,9 +2,8 @@
 
 from __future__ import annotations
 
-from typing_extensions import Required, Annotated, TypedDict
+from typing_extensions import Required, TypedDict
 
-from .._utils import PropertyInfo
 from .eval_task_config_param import EvalTaskConfigParam
 
 __all__ = ["EvalRunEvalParams"]
@@ -12,7 +11,3 @@
 
 class EvalRunEvalParams(TypedDict, total=False):
     task_config: Required[EvalTaskConfigParam]
-
-    x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")]
-
-    x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")]
diff --git a/src/llama_stack_client/types/eval_task_register_params.py b/src/llama_stack_client/types/eval_task_register_params.py
index c7533480..417bc2cd 100644
--- a/src/llama_stack_client/types/eval_task_register_params.py
+++ b/src/llama_stack_client/types/eval_task_register_params.py
@@ -3,9 +3,7 @@
 from __future__ import annotations
 
 from typing import Dict, List, Union, Iterable
-from typing_extensions import Required, Annotated, TypedDict
-
-from .._utils import PropertyInfo
+from typing_extensions import Required, TypedDict
 
 __all__ = ["EvalTaskRegisterParams"]
 
@@ -22,7 +20,3 @@ class EvalTaskRegisterParams(TypedDict, total=False):
     provider_eval_task_id: str
 
     provider_id: str
-
-    x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")]
-
-    x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")]
diff --git a/src/llama_stack_client/types/inference_chat_completion_params.py b/src/llama_stack_client/types/inference_chat_completion_params.py
index b2c66fc9..6382696f 100644
--- a/src/llama_stack_client/types/inference_chat_completion_params.py
+++ b/src/llama_stack_client/types/inference_chat_completion_params.py
@@ -3,9 +3,8 @@
 from __future__ import annotations
 
 from typing import Dict, Union, Iterable
-from typing_extensions import Literal, Required, Annotated, TypedDict
+from typing_extensions import Literal, Required, TypedDict
 
-from .._utils import PropertyInfo
 from .shared_params.message import Message
 from .shared_params.response_format import ResponseFormat
 from .shared_params.sampling_params import SamplingParams
@@ -22,39 +21,56 @@
 
 class InferenceChatCompletionParamsBase(TypedDict, total=False):
     messages: Required[Iterable[Message]]
+    """List of messages in the conversation"""
 
     model_id: Required[str]
+    """The identifier of the model to use.
+
+    The model must be registered with Llama Stack and available via the /models
+    endpoint.
+    """
 
     logprobs: Logprobs
+    """
+    (Optional) If specified, log probabilities for each token position will be
+    returned.
+    """
 
     response_format: ResponseFormat
+    """(Optional) Grammar specification for guided (structured) decoding.
+
+    There are two options: - `ResponseFormat.json_schema`: The grammar is a JSON
+    schema. Most providers support this format. - `ResponseFormat.grammar`: The
+    grammar is a BNF grammar. This format is more flexible, but not all providers
+    support it.
+    """
 
     sampling_params: SamplingParams
+    """Parameters to control the sampling strategy"""
 
     tool_choice: Literal["auto", "required"]
+    """(Optional) Whether tool use is required or automatic.
 
-    tool_prompt_format: Literal["json", "function_tag", "python_list"]
+    Defaults to ToolChoice.auto.
     """
-    `json` -- Refers to the json format for calling tools. The json format takes the
-    form like { "type": "function", "function" : { "name": "function_name",
-    "description": "function_description", "parameters": {...} } }
 
-    `function_tag` -- This is an example of how you could define your own user
-    defined format for making tool calls. The function_tag format looks like this,
-    <function=function_name>(parameters)</function>
+    tool_prompt_format: Literal["json", "function_tag", "python_list"]
+    """(Optional) Instructs the model how to format tool calls.
 
-    The detailed prompts for each of these formats are added to llama cli
+    By default, Llama Stack will attempt to use a format that is best adapted to the
+    model. - `ToolPromptFormat.json`: The tool calls are formatted as a JSON
+    object. - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a
+    <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls
+    are output as Python syntax -- a list of function calls.
     """
 
     tools: Iterable[Tool]
-
-    x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")]
-
-    x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")]
+    """(Optional) List of tool definitions available to the model"""
 
 
 class Logprobs(TypedDict, total=False):
     top_k: int
+    """How many tokens (for each position) to return log probabilities for."""
 
 
 class Tool(TypedDict, total=False):
@@ -67,10 +83,18 @@ class Tool(TypedDict, total=False):
 
 class InferenceChatCompletionParamsNonStreaming(InferenceChatCompletionParamsBase, total=False):
     stream: Literal[False]
+    """(Optional) If True, generate an SSE event stream of the response.
+
+    Defaults to False.
+    """
 
 
 class InferenceChatCompletionParamsStreaming(InferenceChatCompletionParamsBase):
     stream: Required[Literal[True]]
+    """(Optional) If True, generate an SSE event stream of the response.
+
+    Defaults to False.
+    """
 
 
 InferenceChatCompletionParams = Union[InferenceChatCompletionParamsNonStreaming, InferenceChatCompletionParamsStreaming]
diff --git a/src/llama_stack_client/types/inference_chat_completion_response.py b/src/llama_stack_client/types/inference_chat_completion_response.py
index 57e8d5c0..1e59952d 100644
--- a/src/llama_stack_client/types/inference_chat_completion_response.py
+++ b/src/llama_stack_client/types/inference_chat_completion_response.py
@@ -18,22 +18,32 @@
 
 class ChatCompletionResponse(BaseModel):
     completion_message: CompletionMessage
+    """The complete response message"""
 
     logprobs: Optional[List[TokenLogProbs]] = None
+    """Optional log probabilities for generated tokens"""
 
 
 class ChatCompletionResponseStreamChunkEvent(BaseModel):
     delta: ContentDelta
+    """Content generated since last event.
+
+    This can be one or more tokens, or a tool call.
+    """
 
     event_type: Literal["start", "complete", "progress"]
+    """Type of the event"""
 
     logprobs: Optional[List[TokenLogProbs]] = None
+    """Optional log probabilities for generated tokens"""
 
     stop_reason: Optional[Literal["end_of_turn", "end_of_message", "out_of_tokens"]] = None
+    """Optional reason why generation stopped, if complete"""
 
 
 class ChatCompletionResponseStreamChunk(BaseModel):
     event: ChatCompletionResponseStreamChunkEvent
+    """The event containing the new content"""
 
 
 InferenceChatCompletionResponse: TypeAlias = Union[ChatCompletionResponse, ChatCompletionResponseStreamChunk]
diff --git a/src/llama_stack_client/types/inference_completion_params.py b/src/llama_stack_client/types/inference_completion_params.py
index 1f182f45..60ccefce 100644
--- a/src/llama_stack_client/types/inference_completion_params.py
+++ b/src/llama_stack_client/types/inference_completion_params.py
@@ -3,9 +3,8 @@
 from __future__ import annotations
 
 from typing import Union
-from typing_extensions import Literal, Required, Annotated, TypedDict
+from typing_extensions import Literal, Required, TypedDict
 
-from .._utils import PropertyInfo
 from .shared_params.response_format import ResponseFormat
 from .shared_params.sampling_params import SamplingParams
 from .shared_params.interleaved_content import InterleavedContent
@@ -20,30 +19,47 @@
 
 class InferenceCompletionParamsBase(TypedDict, total=False):
     content: Required[InterleavedContent]
+    """The content to generate a completion for"""
 
     model_id: Required[str]
+    """The identifier of the model to use.
+
+    The model must be registered with Llama Stack and available via the /models
+    endpoint.
+    """
 
     logprobs: Logprobs
+    """
+    (Optional) If specified, log probabilities for each token position will be
+    returned.
+    """
 
     response_format: ResponseFormat
+    """(Optional) Grammar specification for guided (structured) decoding"""
 
     sampling_params: SamplingParams
-
-    x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")]
-
-    x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")]
+    """(Optional) Parameters to control the sampling strategy"""
 
 
 class Logprobs(TypedDict, total=False):
     top_k: int
+    """How many tokens (for each position) to return log probabilities for."""
 
 
 class InferenceCompletionParamsNonStreaming(InferenceCompletionParamsBase, total=False):
     stream: Literal[False]
+    """(Optional) If True, generate an SSE event stream of the response.
+
+    Defaults to False.
+    """
 
 
 class InferenceCompletionParamsStreaming(InferenceCompletionParamsBase):
     stream: Required[Literal[True]]
+    """(Optional) If True, generate an SSE event stream of the response.
+
+    Defaults to False.
+    """
 
 
 InferenceCompletionParams = Union[InferenceCompletionParamsNonStreaming, InferenceCompletionParamsStreaming]
diff --git a/src/llama_stack_client/types/inference_completion_response.py b/src/llama_stack_client/types/inference_completion_response.py
index 2f64e1b7..eccf0e7c 100644
--- a/src/llama_stack_client/types/inference_completion_response.py
+++ b/src/llama_stack_client/types/inference_completion_response.py
@@ -12,10 +12,13 @@
 
 class CompletionResponseStreamChunk(BaseModel):
     delta: str
+    """New content generated since last chunk. This can be one or more tokens."""
 
     logprobs: Optional[List[TokenLogProbs]] = None
+    """Optional log probabilities for generated tokens"""
 
     stop_reason: Optional[Literal["end_of_turn", "end_of_message", "out_of_tokens"]] = None
+    """Optional reason why generation stopped, if complete"""
 
 
 InferenceCompletionResponse: TypeAlias = Union[CompletionResponse, CompletionResponseStreamChunk]
diff --git a/src/llama_stack_client/types/inference_embeddings_params.py b/src/llama_stack_client/types/inference_embeddings_params.py
index a7c02b52..926b511b 100644
--- a/src/llama_stack_client/types/inference_embeddings_params.py
+++ b/src/llama_stack_client/types/inference_embeddings_params.py
@@ -3,9 +3,8 @@
 from __future__ import annotations
 
 from typing import List
-from typing_extensions import Required, Annotated, TypedDict
+from typing_extensions import Required, TypedDict
 
-from .._utils import PropertyInfo
 from .shared_params.interleaved_content import InterleavedContent
 
 __all__ = ["InferenceEmbeddingsParams"]
@@ -13,9 +12,15 @@
 
 class InferenceEmbeddingsParams(TypedDict, total=False):
     contents: Required[List[InterleavedContent]]
+    """List of contents to generate embeddings for.
 
-    model_id: Required[str]
+    Note that content can be multimodal. The behavior depends on the model and
+    provider. Some models may only support text.
+    """
 
-    x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")]
+    model_id: Required[str]
+    """The identifier of the model to use.
 
-    x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")]
+    The model must be an embedding model registered with Llama Stack and available
+    via the /models endpoint.
+    """
diff --git a/src/llama_stack_client/types/model_register_params.py b/src/llama_stack_client/types/model_register_params.py
index cc7f6b26..be5d72cc 100644
--- a/src/llama_stack_client/types/model_register_params.py
+++ b/src/llama_stack_client/types/model_register_params.py
@@ -3,9 +3,7 @@
 from __future__ import annotations
 
 from typing import Dict, Union, Iterable
-from typing_extensions import Literal, Required, Annotated, TypedDict
-
-from .._utils import PropertyInfo
+from typing_extensions import Literal, Required, TypedDict
 
 __all__ = ["ModelRegisterParams"]
 
@@ -20,7 +18,3 @@ class ModelRegisterParams(TypedDict, total=False):
     provider_id: str
 
     provider_model_id: str
-
-    x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")]
-
-    x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")]
diff --git a/src/llama_stack_client/types/post_training/job_artifacts_params.py b/src/llama_stack_client/types/post_training/job_artifacts_params.py
index 14b29030..4f75a133 100644
--- a/src/llama_stack_client/types/post_training/job_artifacts_params.py
+++ b/src/llama_stack_client/types/post_training/job_artifacts_params.py
@@ -2,16 +2,10 @@
 
 from __future__ import annotations
 
-from typing_extensions import Required, Annotated, TypedDict
-
-from ..._utils import PropertyInfo
+from typing_extensions import Required, TypedDict
 
 __all__ = ["JobArtifactsParams"]
 
 
 class JobArtifactsParams(TypedDict, total=False):
     job_uuid: Required[str]
-
-    x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")]
-
-    x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")]
diff --git a/src/llama_stack_client/types/post_training/job_cancel_params.py b/src/llama_stack_client/types/post_training/job_cancel_params.py
index a80b4c8c..c9c30d84 100644
--- a/src/llama_stack_client/types/post_training/job_cancel_params.py
+++ b/src/llama_stack_client/types/post_training/job_cancel_params.py
@@ -2,16 +2,10 @@
 
 from __future__ import annotations
 
-from typing_extensions import Required, Annotated, TypedDict
-
-from ..._utils import PropertyInfo
+from typing_extensions import Required, TypedDict
 
 __all__ = ["JobCancelParams"]
 
 
 class JobCancelParams(TypedDict, total=False):
     job_uuid: Required[str]
-
-    x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")]
-
-    x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")]
diff --git a/src/llama_stack_client/types/post_training/job_status_params.py b/src/llama_stack_client/types/post_training/job_status_params.py
index c583e945..8cf17b03 100644
--- a/src/llama_stack_client/types/post_training/job_status_params.py
+++ b/src/llama_stack_client/types/post_training/job_status_params.py
@@ -2,16 +2,10 @@
 
 from __future__ import annotations
 
-from typing_extensions import Required, Annotated, TypedDict
-
-from ..._utils import PropertyInfo
+from typing_extensions import Required, TypedDict
 
 __all__ = ["JobStatusParams"]
 
 
 class JobStatusParams(TypedDict, total=False):
     job_uuid: Required[str]
-
-    x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")]
-
-    x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")]
diff --git a/src/llama_stack_client/types/post_training_preference_optimize_params.py b/src/llama_stack_client/types/post_training_preference_optimize_params.py
index 33540e88..0d79173a 100644
--- a/src/llama_stack_client/types/post_training_preference_optimize_params.py
+++ b/src/llama_stack_client/types/post_training_preference_optimize_params.py
@@ -3,9 +3,7 @@
 from __future__ import annotations
 
 from typing import Dict, Union, Iterable
-from typing_extensions import Literal, Required, Annotated, TypedDict
-
-from .._utils import PropertyInfo
+from typing_extensions import Literal, Required, TypedDict
 
 __all__ = [
     "PostTrainingPreferenceOptimizeParams",
@@ -30,10 +28,6 @@ class PostTrainingPreferenceOptimizeParams(TypedDict, total=False):
 
     training_config: Required[TrainingConfig]
 
-    x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")]
-
-    x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")]
-
 
 class AlgorithmConfig(TypedDict, total=False):
     epsilon: Required[float]
diff --git a/src/llama_stack_client/types/post_training_supervised_fine_tune_params.py b/src/llama_stack_client/types/post_training_supervised_fine_tune_params.py
index 923ceaf6..fa18742a 100644
--- a/src/llama_stack_client/types/post_training_supervised_fine_tune_params.py
+++ b/src/llama_stack_client/types/post_training_supervised_fine_tune_params.py
@@ -3,9 +3,8 @@
 from __future__ import annotations
 
 from typing import Dict, Union, Iterable
-from typing_extensions import Literal, Required, Annotated, TypedDict
+from typing_extensions import Literal, Required, TypedDict
 
-from .._utils import PropertyInfo
 from .algorithm_config_param import AlgorithmConfigParam
 
 __all__ = [
@@ -32,10 +31,6 @@ class PostTrainingSupervisedFineTuneParams(TypedDict, total=False):
 
     checkpoint_dir: str
 
-    x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")]
-
-    x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")]
-
 
 class TrainingConfigDataConfig(TypedDict, total=False):
     batch_size: Required[int]
diff --git a/src/llama_stack_client/types/safety_run_shield_params.py b/src/llama_stack_client/types/safety_run_shield_params.py
index 7139ab25..d2eab4c0 100644
--- a/src/llama_stack_client/types/safety_run_shield_params.py
+++ b/src/llama_stack_client/types/safety_run_shield_params.py
@@ -3,9 +3,8 @@
 from __future__ import annotations
 
 from typing import Dict, Union, Iterable
-from typing_extensions import Required, Annotated, TypedDict
+from typing_extensions import Required, TypedDict
 
-from .._utils import PropertyInfo
 from .shared_params.message import Message
 
 __all__ = ["SafetyRunShieldParams"]
@@ -17,7 +16,3 @@ class SafetyRunShieldParams(TypedDict, total=False):
     params: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
 
     shield_id: Required[str]
-
-    x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")]
-
-    x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")]
diff --git a/src/llama_stack_client/types/scoring_function_register_params.py b/src/llama_stack_client/types/scoring_function_register_params.py
index d00a7576..6c9cb1da 100644
--- a/src/llama_stack_client/types/scoring_function_register_params.py
+++ b/src/llama_stack_client/types/scoring_function_register_params.py
@@ -2,9 +2,8 @@
 
 from __future__ import annotations
 
-from typing_extensions import Required, Annotated, TypedDict
+from typing_extensions import Required, TypedDict
 
-from .._utils import PropertyInfo
 from .scoring_fn_params_param import ScoringFnParamsParam
 from .shared_params.return_type import ReturnType
 
@@ -23,7 +22,3 @@ class ScoringFunctionRegisterParams(TypedDict, total=False):
     provider_id: str
 
     provider_scoring_fn_id: str
-
-    x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")]
-
-    x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")]
diff --git a/src/llama_stack_client/types/scoring_score_batch_params.py b/src/llama_stack_client/types/scoring_score_batch_params.py
index dd6a7430..aa12ac8c 100644
--- a/src/llama_stack_client/types/scoring_score_batch_params.py
+++ b/src/llama_stack_client/types/scoring_score_batch_params.py
@@ -3,9 +3,8 @@
 from __future__ import annotations
 
 from typing import Dict, Optional
-from typing_extensions import Required, Annotated, TypedDict
+from typing_extensions import Required, TypedDict
 
-from .._utils import PropertyInfo
 from .scoring_fn_params_param import ScoringFnParamsParam
 
 __all__ = ["ScoringScoreBatchParams"]
@@ -17,7 +16,3 @@ class ScoringScoreBatchParams(TypedDict, total=False):
     save_results_dataset: Required[bool]
 
     scoring_functions: Required[Dict[str, Optional[ScoringFnParamsParam]]]
-
-    x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")]
-
-    x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")]
diff --git a/src/llama_stack_client/types/scoring_score_params.py b/src/llama_stack_client/types/scoring_score_params.py
index f294e191..a0af7890 100644
--- a/src/llama_stack_client/types/scoring_score_params.py
+++ b/src/llama_stack_client/types/scoring_score_params.py
@@ -3,9 +3,8 @@
 from __future__ import annotations
 
 from typing import Dict, Union, Iterable, Optional
-from typing_extensions import Required, Annotated, TypedDict
+from typing_extensions import Required, TypedDict
 
-from .._utils import PropertyInfo
 from .scoring_fn_params_param import ScoringFnParamsParam
 
 __all__ = ["ScoringScoreParams"]
@@ -15,7 +14,3 @@ class ScoringScoreParams(TypedDict, total=False):
     input_rows: Required[Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]]
 
     scoring_functions: Required[Dict[str, Optional[ScoringFnParamsParam]]]
-
-    x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")]
-
-    x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")]
diff --git a/src/llama_stack_client/types/shared/agent_config.py b/src/llama_stack_client/types/shared/agent_config.py
index 6e7dadf2..ad94e3f1 100644
--- a/src/llama_stack_client/types/shared/agent_config.py
+++ b/src/llama_stack_client/types/shared/agent_config.py
@@ -42,16 +42,5 @@ class AgentConfig(BaseModel):
     tool_choice: Optional[Literal["auto", "required"]] = None
 
     tool_prompt_format: Optional[Literal["json", "function_tag", "python_list"]] = None
-    """
-    `json` -- Refers to the json format for calling tools. The json format takes the
-    form like { "type": "function", "function" : { "name": "function_name",
-    "description": "function_description", "parameters": {...} } }
-
-    `function_tag` -- This is an example of how you could define your own user
-    defined format for making tool calls. The function_tag format looks like this,
-    <function=function_name>(parameters)</function>
-
-    The detailed prompts for each of these formats are added to llama cli
-    """
 
     toolgroups: Optional[List[Toolgroup]] = None
diff --git a/src/llama_stack_client/types/shared/batch_completion.py b/src/llama_stack_client/types/shared/batch_completion.py
index 07624df2..547884d1 100644
--- a/src/llama_stack_client/types/shared/batch_completion.py
+++ b/src/llama_stack_client/types/shared/batch_completion.py
@@ -3,10 +3,10 @@
 from typing import List
 
 from ..._models import BaseModel
-from .completion_message import CompletionMessage
+from ..completion_response import CompletionResponse
 
 __all__ = ["BatchCompletion"]
 
 
 class BatchCompletion(BaseModel):
-    completion_message_batch: List[CompletionMessage]
+    batch: List[CompletionResponse]
diff --git a/src/llama_stack_client/types/shared/completion_message.py b/src/llama_stack_client/types/shared/completion_message.py
index 563c0d96..373d6b1d 100644
--- a/src/llama_stack_client/types/shared/completion_message.py
+++ b/src/llama_stack_client/types/shared/completion_message.py
@@ -12,9 +12,20 @@
 
 class CompletionMessage(BaseModel):
     content: InterleavedContent
+    """The content of the model's response"""
 
     role: Literal["assistant"]
+    """Must be "assistant" to identify this as the model's response"""
 
     stop_reason: Literal["end_of_turn", "end_of_message", "out_of_tokens"]
+    """Reason why the model stopped generating.
+
+    Options are: - `StopReason.end_of_turn`: The model finished generating the
+    entire response. - `StopReason.end_of_message`: The model finished generating
+    but generated a partial response -- usually, a tool call. The user may call the
+    tool and continue the conversation with the tool's response. -
+    `StopReason.out_of_tokens`: The model ran out of token budget.
+    """
 
     tool_calls: List[ToolCall]
+    """List of tool calls. Each tool call is a ToolCall object."""
diff --git a/src/llama_stack_client/types/shared/response_format.py b/src/llama_stack_client/types/shared/response_format.py
index 66ffae78..537df8d5 100644
--- a/src/llama_stack_client/types/shared/response_format.py
+++ b/src/llama_stack_client/types/shared/response_format.py
@@ -11,14 +11,21 @@
 
 class JsonSchemaResponseFormat(BaseModel):
     json_schema: Dict[str, Union[bool, float, str, List[object], object, None]]
+    """The JSON schema the response should conform to.
+
+    In a Python SDK, this is often a `pydantic` model.
+    """
 
     type: Literal["json_schema"]
+    """Must be "json_schema" to identify this format type"""
 
 
 class GrammarResponseFormat(BaseModel):
     bnf: Dict[str, Union[bool, float, str, List[object], object, None]]
+    """The BNF grammar specification the response should conform to"""
 
     type: Literal["grammar"]
+    """Must be "grammar" to identify this format type"""
 
 
 ResponseFormat: TypeAlias = Annotated[
diff --git a/src/llama_stack_client/types/shared/system_message.py b/src/llama_stack_client/types/shared/system_message.py
index dc4a684a..a854e019 100644
--- a/src/llama_stack_client/types/shared/system_message.py
+++ b/src/llama_stack_client/types/shared/system_message.py
@@ -10,5 +10,12 @@
 
 class SystemMessage(BaseModel):
     content: InterleavedContent
+    """The content of the "system prompt".
+
+    If multiple system messages are provided, they are concatenated. The underlying
+    Llama Stack code may also add other system messages (for example, for formatting
+    tool definitions).
+    """
 
     role: Literal["system"]
+    """Must be "system" to identify this as a system message"""
diff --git a/src/llama_stack_client/types/shared/tool_response_message.py b/src/llama_stack_client/types/shared/tool_response_message.py
index 30efa449..c7a61ac7 100644
--- a/src/llama_stack_client/types/shared/tool_response_message.py
+++ b/src/llama_stack_client/types/shared/tool_response_message.py
@@ -11,9 +11,13 @@
 
 class ToolResponseMessage(BaseModel):
     call_id: str
+    """Unique identifier for the tool call this response is for"""
 
     content: InterleavedContent
+    """The response content from the tool"""
 
     role: Literal["tool"]
+    """Must be "tool" to identify this as a tool response"""
 
     tool_name: Union[Literal["brave_search", "wolfram_alpha", "photogen", "code_interpreter"], str]
+    """Name of the tool that was called"""
diff --git a/src/llama_stack_client/types/shared/user_message.py b/src/llama_stack_client/types/shared/user_message.py
index e0f68b96..2a89fbba 100644
--- a/src/llama_stack_client/types/shared/user_message.py
+++ b/src/llama_stack_client/types/shared/user_message.py
@@ -11,7 +11,13 @@
 
 class UserMessage(BaseModel):
     content: InterleavedContent
+    """The content of the message, which can include text and other media"""
 
     role: Literal["user"]
+    """Must be "user" to identify this as a user message"""
 
     context: Optional[InterleavedContent] = None
+    """(Optional) This field is used internally by Llama Stack to pass RAG context.
+
+    This field may be removed in the API in the future.
+    """
diff --git a/src/llama_stack_client/types/shared_params/agent_config.py b/src/llama_stack_client/types/shared_params/agent_config.py
index 3eb86ac1..186eff85 100644
--- a/src/llama_stack_client/types/shared_params/agent_config.py
+++ b/src/llama_stack_client/types/shared_params/agent_config.py
@@ -43,16 +43,5 @@ class AgentConfig(TypedDict, total=False):
     tool_choice: Literal["auto", "required"]
 
     tool_prompt_format: Literal["json", "function_tag", "python_list"]
-    """
-    `json` -- Refers to the json format for calling tools. The json format takes the
-    form like { "type": "function", "function" : { "name": "function_name",
-    "description": "function_description", "parameters": {...} } }
-
-    `function_tag` -- This is an example of how you could define your own user
-    defined format for making tool calls. The function_tag format looks like this,
-    <function=function_name>(parameters)</function>
-
-    The detailed prompts for each of these formats are added to llama cli
-    """
 
     toolgroups: List[Toolgroup]
diff --git a/src/llama_stack_client/types/shared_params/completion_message.py b/src/llama_stack_client/types/shared_params/completion_message.py
index f996408b..4c480807 100644
--- a/src/llama_stack_client/types/shared_params/completion_message.py
+++ b/src/llama_stack_client/types/shared_params/completion_message.py
@@ -13,9 +13,20 @@
 
 class CompletionMessage(TypedDict, total=False):
     content: Required[InterleavedContent]
+    """The content of the model's response"""
 
     role: Required[Literal["assistant"]]
+    """Must be "assistant" to identify this as the model's response"""
 
     stop_reason: Required[Literal["end_of_turn", "end_of_message", "out_of_tokens"]]
+    """Reason why the model stopped generating.
+
+    Options are: - `StopReason.end_of_turn`: The model finished generating the
+    entire response. - `StopReason.end_of_message`: The model finished generating
+    but generated a partial response -- usually, a tool call. The user may call the
+    tool and continue the conversation with the tool's response. -
+    `StopReason.out_of_tokens`: The model ran out of token budget.
+    """
 
     tool_calls: Required[Iterable[ToolCall]]
+    """List of tool calls. Each tool call is a ToolCall object."""
diff --git a/src/llama_stack_client/types/shared_params/response_format.py b/src/llama_stack_client/types/shared_params/response_format.py
index 7e0b56db..53411700 100644
--- a/src/llama_stack_client/types/shared_params/response_format.py
+++ b/src/llama_stack_client/types/shared_params/response_format.py
@@ -10,14 +10,21 @@
 
 class JsonSchemaResponseFormat(TypedDict, total=False):
     json_schema: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
+    """The JSON schema the response should conform to.
+
+    In a Python SDK, this is often a `pydantic` model.
+    """
 
     type: Required[Literal["json_schema"]]
+    """Must be "json_schema" to identify this format type"""
 
 
 class GrammarResponseFormat(TypedDict, total=False):
     bnf: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
+    """The BNF grammar specification the response should conform to"""
 
     type: Required[Literal["grammar"]]
+    """Must be "grammar" to identify this format type"""
 
 
 ResponseFormat: TypeAlias = Union[JsonSchemaResponseFormat, GrammarResponseFormat]
diff --git a/src/llama_stack_client/types/shared_params/system_message.py b/src/llama_stack_client/types/shared_params/system_message.py
index 66230b49..7cf9535c 100644
--- a/src/llama_stack_client/types/shared_params/system_message.py
+++ b/src/llama_stack_client/types/shared_params/system_message.py
@@ -11,5 +11,12 @@
 
 class SystemMessage(TypedDict, total=False):
     content: Required[InterleavedContent]
+    """The content of the "system prompt".
+
+    If multiple system messages are provided, they are concatenated. The underlying
+    Llama Stack code may also add other system messages (for example, for formatting
+    tool definitions).
+    """
 
     role: Required[Literal["system"]]
+    """Must be "system" to identify this as a system message"""
diff --git a/src/llama_stack_client/types/shared_params/tool_response_message.py b/src/llama_stack_client/types/shared_params/tool_response_message.py
index 625c9ba2..cf08fb0e 100644
--- a/src/llama_stack_client/types/shared_params/tool_response_message.py
+++ b/src/llama_stack_client/types/shared_params/tool_response_message.py
@@ -12,9 +12,13 @@
 
 class ToolResponseMessage(TypedDict, total=False):
     call_id: Required[str]
+    """Unique identifier for the tool call this response is for"""
 
     content: Required[InterleavedContent]
+    """The response content from the tool"""
 
     role: Required[Literal["tool"]]
+    """Must be "tool" to identify this as a tool response"""
 
     tool_name: Required[Union[Literal["brave_search", "wolfram_alpha", "photogen", "code_interpreter"], str]]
+    """Name of the tool that was called"""
diff --git a/src/llama_stack_client/types/shared_params/user_message.py b/src/llama_stack_client/types/shared_params/user_message.py
index bdea7000..4b8e3de3 100644
--- a/src/llama_stack_client/types/shared_params/user_message.py
+++ b/src/llama_stack_client/types/shared_params/user_message.py
@@ -11,7 +11,13 @@
 
 class UserMessage(TypedDict, total=False):
     content: Required[InterleavedContent]
+    """The content of the message, which can include text and other media"""
 
     role: Required[Literal["user"]]
+    """Must be "user" to identify this as a user message"""
 
     context: InterleavedContent
+    """(Optional) This field is used internally by Llama Stack to pass RAG context.
+
+    This field may be removed in the API in the future.
+    """
diff --git a/src/llama_stack_client/types/shield_register_params.py b/src/llama_stack_client/types/shield_register_params.py
index f7972610..2a51fd04 100644
--- a/src/llama_stack_client/types/shield_register_params.py
+++ b/src/llama_stack_client/types/shield_register_params.py
@@ -3,9 +3,7 @@
 from __future__ import annotations
 
 from typing import Dict, Union, Iterable
-from typing_extensions import Required, Annotated, TypedDict
-
-from .._utils import PropertyInfo
+from typing_extensions import Required, TypedDict
 
 __all__ = ["ShieldRegisterParams"]
 
@@ -18,7 +16,3 @@ class ShieldRegisterParams(TypedDict, total=False):
     provider_id: str
 
     provider_shield_id: str
-
-    x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")]
-
-    x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")]
diff --git a/src/llama_stack_client/types/synthetic_data_generation_generate_params.py b/src/llama_stack_client/types/synthetic_data_generation_generate_params.py
index 74641a4b..e10842bd 100644
--- a/src/llama_stack_client/types/synthetic_data_generation_generate_params.py
+++ b/src/llama_stack_client/types/synthetic_data_generation_generate_params.py
@@ -3,9 +3,8 @@
 from __future__ import annotations
 
 from typing import Iterable
-from typing_extensions import Literal, Required, Annotated, TypedDict
+from typing_extensions import Literal, Required, TypedDict
 
-from .._utils import PropertyInfo
 from .shared_params.message import Message
 
 __all__ = ["SyntheticDataGenerationGenerateParams"]
@@ -17,7 +16,3 @@ class SyntheticDataGenerationGenerateParams(TypedDict, total=False):
     filtering_function: Required[Literal["none", "random", "top_k", "top_p", "top_k_top_p", "sigmoid"]]
 
     model: str
-
-    x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")]
-
-    x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")]
diff --git a/src/llama_stack_client/types/telemetry_get_span_tree_params.py b/src/llama_stack_client/types/telemetry_get_span_tree_params.py
index c0f814a2..d9d647dd 100644
--- a/src/llama_stack_client/types/telemetry_get_span_tree_params.py
+++ b/src/llama_stack_client/types/telemetry_get_span_tree_params.py
@@ -3,9 +3,7 @@
 from __future__ import annotations
 
 from typing import List
-from typing_extensions import Annotated, TypedDict
-
-from .._utils import PropertyInfo
+from typing_extensions import TypedDict
 
 __all__ = ["TelemetryGetSpanTreeParams"]
 
@@ -14,7 +12,3 @@ class TelemetryGetSpanTreeParams(TypedDict, total=False):
     attributes_to_return: List[str]
 
     max_depth: int
-
-    x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")]
-
-    x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")]
diff --git a/src/llama_stack_client/types/telemetry_log_event_params.py b/src/llama_stack_client/types/telemetry_log_event_params.py
index 44b72322..ef536792 100644
--- a/src/llama_stack_client/types/telemetry_log_event_params.py
+++ b/src/llama_stack_client/types/telemetry_log_event_params.py
@@ -2,9 +2,8 @@
 
 from __future__ import annotations
 
-from typing_extensions import Required, Annotated, TypedDict
+from typing_extensions import Required, TypedDict
 
-from .._utils import PropertyInfo
 from .event_param import EventParam
 
 __all__ = ["TelemetryLogEventParams"]
@@ -14,7 +13,3 @@ class TelemetryLogEventParams(TypedDict, total=False):
     event: Required[EventParam]
 
     ttl_seconds: Required[int]
-
-    x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")]
-
-    x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")]
diff --git a/src/llama_stack_client/types/telemetry_query_spans_params.py b/src/llama_stack_client/types/telemetry_query_spans_params.py
index ec588473..897a024a 100644
--- a/src/llama_stack_client/types/telemetry_query_spans_params.py
+++ b/src/llama_stack_client/types/telemetry_query_spans_params.py
@@ -3,9 +3,8 @@
 from __future__ import annotations
 
 from typing import List, Iterable
-from typing_extensions import Required, Annotated, TypedDict
+from typing_extensions import Required, TypedDict
 
-from .._utils import PropertyInfo
 from .query_condition_param import QueryConditionParam
 
 __all__ = ["TelemetryQuerySpansParams"]
@@ -17,7 +16,3 @@ class TelemetryQuerySpansParams(TypedDict, total=False):
     attributes_to_return: Required[List[str]]
 
     max_depth: int
-
-    x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")]
-
-    x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")]
diff --git a/src/llama_stack_client/types/telemetry_query_traces_params.py b/src/llama_stack_client/types/telemetry_query_traces_params.py
index f54252d7..425b1a00 100644
--- a/src/llama_stack_client/types/telemetry_query_traces_params.py
+++ b/src/llama_stack_client/types/telemetry_query_traces_params.py
@@ -3,9 +3,8 @@
 from __future__ import annotations
 
 from typing import List, Iterable
-from typing_extensions import Annotated, TypedDict
+from typing_extensions import TypedDict
 
-from .._utils import PropertyInfo
 from .query_condition_param import QueryConditionParam
 
 __all__ = ["TelemetryQueryTracesParams"]
@@ -19,7 +18,3 @@ class TelemetryQueryTracesParams(TypedDict, total=False):
     offset: int
 
     order_by: List[str]
-
-    x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")]
-
-    x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")]
diff --git a/src/llama_stack_client/types/telemetry_save_spans_to_dataset_params.py b/src/llama_stack_client/types/telemetry_save_spans_to_dataset_params.py
index 5c13f671..5dc8419c 100644
--- a/src/llama_stack_client/types/telemetry_save_spans_to_dataset_params.py
+++ b/src/llama_stack_client/types/telemetry_save_spans_to_dataset_params.py
@@ -3,9 +3,8 @@
 from __future__ import annotations
 
 from typing import List, Iterable
-from typing_extensions import Required, Annotated, TypedDict
+from typing_extensions import Required, TypedDict
 
-from .._utils import PropertyInfo
 from .query_condition_param import QueryConditionParam
 
 __all__ = ["TelemetrySaveSpansToDatasetParams"]
@@ -19,7 +18,3 @@ class TelemetrySaveSpansToDatasetParams(TypedDict, total=False):
     dataset_id: Required[str]
 
     max_depth: int
-
-    x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")]
-
-    x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")]
diff --git a/src/llama_stack_client/types/token_log_probs.py b/src/llama_stack_client/types/token_log_probs.py
index 45bc634a..b1a0a2b4 100644
--- a/src/llama_stack_client/types/token_log_probs.py
+++ b/src/llama_stack_client/types/token_log_probs.py
@@ -9,3 +9,4 @@
 
 class TokenLogProbs(BaseModel):
     logprobs_by_token: Dict[str, float]
+    """Dictionary mapping tokens to their log probabilities"""
diff --git a/src/llama_stack_client/types/tool_list_params.py b/src/llama_stack_client/types/tool_list_params.py
index bfebd79d..c0953896 100644
--- a/src/llama_stack_client/types/tool_list_params.py
+++ b/src/llama_stack_client/types/tool_list_params.py
@@ -2,16 +2,10 @@
 
 from __future__ import annotations
 
-from typing_extensions import Annotated, TypedDict
-
-from .._utils import PropertyInfo
+from typing_extensions import TypedDict
 
 __all__ = ["ToolListParams"]
 
 
 class ToolListParams(TypedDict, total=False):
     toolgroup_id: str
-
-    x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")]
-
-    x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")]
diff --git a/src/llama_stack_client/types/tool_runtime/rag_tool_insert_params.py b/src/llama_stack_client/types/tool_runtime/rag_tool_insert_params.py
index dd1e61ca..bc52c481 100644
--- a/src/llama_stack_client/types/tool_runtime/rag_tool_insert_params.py
+++ b/src/llama_stack_client/types/tool_runtime/rag_tool_insert_params.py
@@ -3,9 +3,8 @@
 from __future__ import annotations
 
 from typing import Iterable
-from typing_extensions import Required, Annotated, TypedDict
+from typing_extensions import Required, TypedDict
 
-from ..._utils import PropertyInfo
 from ..shared_params.document import Document
 
 __all__ = ["RagToolInsertParams"]
@@ -17,7 +16,3 @@ class RagToolInsertParams(TypedDict, total=False):
     documents: Required[Iterable[Document]]
 
     vector_db_id: Required[str]
-
-    x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")]
-
-    x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")]
diff --git a/src/llama_stack_client/types/tool_runtime/rag_tool_query_params.py b/src/llama_stack_client/types/tool_runtime/rag_tool_query_params.py
index 4ddf8d17..b4e7c003 100644
--- a/src/llama_stack_client/types/tool_runtime/rag_tool_query_params.py
+++ b/src/llama_stack_client/types/tool_runtime/rag_tool_query_params.py
@@ -3,9 +3,8 @@
 from __future__ import annotations
 
 from typing import List
-from typing_extensions import Required, Annotated, TypedDict
+from typing_extensions import Required, TypedDict
 
-from ..._utils import PropertyInfo
 from ..shared_params.query_config import QueryConfig
 from ..shared_params.interleaved_content import InterleavedContent
 
@@ -18,7 +17,3 @@ class RagToolQueryParams(TypedDict, total=False):
     vector_db_ids: Required[List[str]]
 
     query_config: QueryConfig
-
-    x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")]
-
-    x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")]
diff --git a/src/llama_stack_client/types/tool_runtime_invoke_tool_params.py b/src/llama_stack_client/types/tool_runtime_invoke_tool_params.py
index 76354f7e..68b40462 100644
--- a/src/llama_stack_client/types/tool_runtime_invoke_tool_params.py
+++ b/src/llama_stack_client/types/tool_runtime_invoke_tool_params.py
@@ -3,9 +3,7 @@
 from __future__ import annotations
 
 from typing import Dict, Union, Iterable
-from typing_extensions import Required, Annotated, TypedDict
-
-from .._utils import PropertyInfo
+from typing_extensions import Required, TypedDict
 
 __all__ = ["ToolRuntimeInvokeToolParams"]
 
@@ -14,7 +12,3 @@ class ToolRuntimeInvokeToolParams(TypedDict, total=False):
     kwargs: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
 
     tool_name: Required[str]
-
-    x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")]
-
-    x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")]
diff --git a/src/llama_stack_client/types/tool_runtime_list_tools_params.py b/src/llama_stack_client/types/tool_runtime_list_tools_params.py
index c7c96791..7db74244 100644
--- a/src/llama_stack_client/types/tool_runtime_list_tools_params.py
+++ b/src/llama_stack_client/types/tool_runtime_list_tools_params.py
@@ -2,9 +2,8 @@
 
 from __future__ import annotations
 
-from typing_extensions import Annotated, TypedDict
+from typing_extensions import TypedDict
 
-from .._utils import PropertyInfo
 from .shared_params.url import URL
 
 __all__ = ["ToolRuntimeListToolsParams"]
@@ -14,7 +13,3 @@ class ToolRuntimeListToolsParams(TypedDict, total=False):
     mcp_endpoint: URL
 
     tool_group_id: str
-
-    x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")]
-
-    x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")]
diff --git a/src/llama_stack_client/types/toolgroup_register_params.py b/src/llama_stack_client/types/toolgroup_register_params.py
index 880c4480..1184be85 100644
--- a/src/llama_stack_client/types/toolgroup_register_params.py
+++ b/src/llama_stack_client/types/toolgroup_register_params.py
@@ -3,9 +3,8 @@
 from __future__ import annotations
 
 from typing import Dict, Union, Iterable
-from typing_extensions import Required, Annotated, TypedDict
+from typing_extensions import Required, TypedDict
 
-from .._utils import PropertyInfo
 from .shared_params.url import URL
 
 __all__ = ["ToolgroupRegisterParams"]
@@ -19,7 +18,3 @@ class ToolgroupRegisterParams(TypedDict, total=False):
     args: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
 
     mcp_endpoint: URL
-
-    x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")]
-
-    x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")]
diff --git a/src/llama_stack_client/types/vector_db_register_params.py b/src/llama_stack_client/types/vector_db_register_params.py
index 09dc7eba..6083bbce 100644
--- a/src/llama_stack_client/types/vector_db_register_params.py
+++ b/src/llama_stack_client/types/vector_db_register_params.py
@@ -2,9 +2,7 @@
 
 from __future__ import annotations
 
-from typing_extensions import Required, Annotated, TypedDict
-
-from .._utils import PropertyInfo
+from typing_extensions import Required, TypedDict
 
 __all__ = ["VectorDBRegisterParams"]
 
@@ -19,7 +17,3 @@ class VectorDBRegisterParams(TypedDict, total=False):
     provider_id: str
 
     provider_vector_db_id: str
-
-    x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")]
-
-    x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")]
diff --git a/src/llama_stack_client/types/vector_io_insert_params.py b/src/llama_stack_client/types/vector_io_insert_params.py
index 9570bb57..faac744b 100644
--- a/src/llama_stack_client/types/vector_io_insert_params.py
+++ b/src/llama_stack_client/types/vector_io_insert_params.py
@@ -3,9 +3,8 @@
 from __future__ import annotations
 
 from typing import Dict, Union, Iterable
-from typing_extensions import Required, Annotated, TypedDict
+from typing_extensions import Required, TypedDict
 
-from .._utils import PropertyInfo
 from .shared_params.interleaved_content import InterleavedContent
 
 __all__ = ["VectorIoInsertParams", "Chunk"]
@@ -18,10 +17,6 @@ class VectorIoInsertParams(TypedDict, total=False):
 
     ttl_seconds: int
 
-    x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")]
-
-    x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")]
-
 
 class Chunk(TypedDict, total=False):
     content: Required[InterleavedContent]
diff --git a/src/llama_stack_client/types/vector_io_query_params.py b/src/llama_stack_client/types/vector_io_query_params.py
index c626509b..97b48ddc 100644
--- a/src/llama_stack_client/types/vector_io_query_params.py
+++ b/src/llama_stack_client/types/vector_io_query_params.py
@@ -3,9 +3,8 @@
 from __future__ import annotations
 
 from typing import Dict, Union, Iterable
-from typing_extensions import Required, Annotated, TypedDict
+from typing_extensions import Required, TypedDict
 
-from .._utils import PropertyInfo
 from .shared_params.interleaved_content import InterleavedContent
 
 __all__ = ["VectorIoQueryParams"]
@@ -17,7 +16,3 @@ class VectorIoQueryParams(TypedDict, total=False):
     vector_db_id: Required[str]
 
     params: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
-
-    x_llama_stack_client_version: Annotated[str, PropertyInfo(alias="X-LlamaStack-Client-Version")]
-
-    x_llama_stack_provider_data: Annotated[str, PropertyInfo(alias="X-LlamaStack-Provider-Data")]
diff --git a/tests/api_resources/agents/test_session.py b/tests/api_resources/agents/test_session.py
index 7ee27386..43b36a40 100644
--- a/tests/api_resources/agents/test_session.py
+++ b/tests/api_resources/agents/test_session.py
@@ -28,16 +28,6 @@ def test_method_create(self, client: LlamaStackClient) -> None:
         )
         assert_matches_type(SessionCreateResponse, session, path=["response"])
 
-    @parametrize
-    def test_method_create_with_all_params(self, client: LlamaStackClient) -> None:
-        session = client.agents.session.create(
-            agent_id="agent_id",
-            session_name="session_name",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(SessionCreateResponse, session, path=["response"])
-
     @parametrize
     def test_raw_response_create(self, client: LlamaStackClient) -> None:
         response = client.agents.session.with_raw_response.create(
@@ -86,8 +76,6 @@ def test_method_retrieve_with_all_params(self, client: LlamaStackClient) -> None
             session_id="session_id",
             agent_id="agent_id",
             turn_ids=["string"],
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(Session, session, path=["response"])
 
@@ -139,16 +127,6 @@ def test_method_delete(self, client: LlamaStackClient) -> None:
         )
         assert session is None
 
-    @parametrize
-    def test_method_delete_with_all_params(self, client: LlamaStackClient) -> None:
-        session = client.agents.session.delete(
-            session_id="session_id",
-            agent_id="agent_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert session is None
-
     @parametrize
     def test_raw_response_delete(self, client: LlamaStackClient) -> None:
         response = client.agents.session.with_raw_response.delete(
@@ -201,16 +179,6 @@ async def test_method_create(self, async_client: AsyncLlamaStackClient) -> None:
         )
         assert_matches_type(SessionCreateResponse, session, path=["response"])
 
-    @parametrize
-    async def test_method_create_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        session = await async_client.agents.session.create(
-            agent_id="agent_id",
-            session_name="session_name",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(SessionCreateResponse, session, path=["response"])
-
     @parametrize
     async def test_raw_response_create(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.agents.session.with_raw_response.create(
@@ -259,8 +227,6 @@ async def test_method_retrieve_with_all_params(self, async_client: AsyncLlamaSta
             session_id="session_id",
             agent_id="agent_id",
             turn_ids=["string"],
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(Session, session, path=["response"])
 
@@ -312,16 +278,6 @@ async def test_method_delete(self, async_client: AsyncLlamaStackClient) -> None:
         )
         assert session is None
 
-    @parametrize
-    async def test_method_delete_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        session = await async_client.agents.session.delete(
-            session_id="session_id",
-            agent_id="agent_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert session is None
-
     @parametrize
     async def test_raw_response_delete(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.agents.session.with_raw_response.delete(
diff --git a/tests/api_resources/agents/test_steps.py b/tests/api_resources/agents/test_steps.py
index 8315f896..2cee6f12 100644
--- a/tests/api_resources/agents/test_steps.py
+++ b/tests/api_resources/agents/test_steps.py
@@ -27,18 +27,6 @@ def test_method_retrieve(self, client: LlamaStackClient) -> None:
         )
         assert_matches_type(StepRetrieveResponse, step, path=["response"])
 
-    @parametrize
-    def test_method_retrieve_with_all_params(self, client: LlamaStackClient) -> None:
-        step = client.agents.steps.retrieve(
-            step_id="step_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            turn_id="turn_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(StepRetrieveResponse, step, path=["response"])
-
     @parametrize
     def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
         response = client.agents.steps.with_raw_response.retrieve(
@@ -117,18 +105,6 @@ async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> Non
         )
         assert_matches_type(StepRetrieveResponse, step, path=["response"])
 
-    @parametrize
-    async def test_method_retrieve_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        step = await async_client.agents.steps.retrieve(
-            step_id="step_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            turn_id="turn_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(StepRetrieveResponse, step, path=["response"])
-
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.agents.steps.with_raw_response.retrieve(
diff --git a/tests/api_resources/agents/test_turn.py b/tests/api_resources/agents/test_turn.py
index fcfe6a4a..c6e7138e 100644
--- a/tests/api_resources/agents/test_turn.py
+++ b/tests/api_resources/agents/test_turn.py
@@ -57,8 +57,6 @@ def test_method_create_with_all_params_overload_1(self, client: LlamaStackClient
             ],
             stream=False,
             toolgroups=["string"],
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(TurnCreateResponse, turn, path=["response"])
 
@@ -176,8 +174,6 @@ def test_method_create_with_all_params_overload_2(self, client: LlamaStackClient
                 }
             ],
             toolgroups=["string"],
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         turn_stream.response.close()
 
@@ -266,17 +262,6 @@ def test_method_retrieve(self, client: LlamaStackClient) -> None:
         )
         assert_matches_type(Turn, turn, path=["response"])
 
-    @parametrize
-    def test_method_retrieve_with_all_params(self, client: LlamaStackClient) -> None:
-        turn = client.agents.turn.retrieve(
-            turn_id="turn_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(Turn, turn, path=["response"])
-
     @parametrize
     def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
         response = client.agents.turn.with_raw_response.retrieve(
@@ -372,8 +357,6 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
             ],
             stream=False,
             toolgroups=["string"],
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(TurnCreateResponse, turn, path=["response"])
 
@@ -491,8 +474,6 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
                 }
             ],
             toolgroups=["string"],
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         await turn_stream.response.aclose()
 
@@ -581,17 +562,6 @@ async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> Non
         )
         assert_matches_type(Turn, turn, path=["response"])
 
-    @parametrize
-    async def test_method_retrieve_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        turn = await async_client.agents.turn.retrieve(
-            turn_id="turn_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(Turn, turn, path=["response"])
-
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.agents.turn.with_raw_response.retrieve(
diff --git a/tests/api_resources/eval/test_jobs.py b/tests/api_resources/eval/test_jobs.py
index bf5a8865..beb290a0 100644
--- a/tests/api_resources/eval/test_jobs.py
+++ b/tests/api_resources/eval/test_jobs.py
@@ -26,16 +26,6 @@ def test_method_retrieve(self, client: LlamaStackClient) -> None:
         )
         assert_matches_type(EvaluateResponse, job, path=["response"])
 
-    @parametrize
-    def test_method_retrieve_with_all_params(self, client: LlamaStackClient) -> None:
-        job = client.eval.jobs.retrieve(
-            job_id="job_id",
-            task_id="task_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(EvaluateResponse, job, path=["response"])
-
     @parametrize
     def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
         response = client.eval.jobs.with_raw_response.retrieve(
@@ -84,16 +74,6 @@ def test_method_cancel(self, client: LlamaStackClient) -> None:
         )
         assert job is None
 
-    @parametrize
-    def test_method_cancel_with_all_params(self, client: LlamaStackClient) -> None:
-        job = client.eval.jobs.cancel(
-            job_id="job_id",
-            task_id="task_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert job is None
-
     @parametrize
     def test_raw_response_cancel(self, client: LlamaStackClient) -> None:
         response = client.eval.jobs.with_raw_response.cancel(
@@ -142,16 +122,6 @@ def test_method_status(self, client: LlamaStackClient) -> None:
         )
         assert_matches_type(Optional[JobStatusResponse], job, path=["response"])
 
-    @parametrize
-    def test_method_status_with_all_params(self, client: LlamaStackClient) -> None:
-        job = client.eval.jobs.status(
-            job_id="job_id",
-            task_id="task_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(Optional[JobStatusResponse], job, path=["response"])
-
     @parametrize
     def test_raw_response_status(self, client: LlamaStackClient) -> None:
         response = client.eval.jobs.with_raw_response.status(
@@ -204,16 +174,6 @@ async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> Non
         )
         assert_matches_type(EvaluateResponse, job, path=["response"])
 
-    @parametrize
-    async def test_method_retrieve_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        job = await async_client.eval.jobs.retrieve(
-            job_id="job_id",
-            task_id="task_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(EvaluateResponse, job, path=["response"])
-
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.eval.jobs.with_raw_response.retrieve(
@@ -262,16 +222,6 @@ async def test_method_cancel(self, async_client: AsyncLlamaStackClient) -> None:
         )
         assert job is None
 
-    @parametrize
-    async def test_method_cancel_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        job = await async_client.eval.jobs.cancel(
-            job_id="job_id",
-            task_id="task_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert job is None
-
     @parametrize
     async def test_raw_response_cancel(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.eval.jobs.with_raw_response.cancel(
@@ -320,16 +270,6 @@ async def test_method_status(self, async_client: AsyncLlamaStackClient) -> None:
         )
         assert_matches_type(Optional[JobStatusResponse], job, path=["response"])
 
-    @parametrize
-    async def test_method_status_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        job = await async_client.eval.jobs.status(
-            job_id="job_id",
-            task_id="task_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(Optional[JobStatusResponse], job, path=["response"])
-
     @parametrize
     async def test_raw_response_status(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.eval.jobs.with_raw_response.status(
diff --git a/tests/api_resources/post_training/test_job.py b/tests/api_resources/post_training/test_job.py
index 72f0af05..2ddb2f30 100644
--- a/tests/api_resources/post_training/test_job.py
+++ b/tests/api_resources/post_training/test_job.py
@@ -26,14 +26,6 @@ def test_method_list(self, client: LlamaStackClient) -> None:
         job = client.post_training.job.list()
         assert_matches_type(JobListResponse, job, path=["response"])
 
-    @parametrize
-    def test_method_list_with_all_params(self, client: LlamaStackClient) -> None:
-        job = client.post_training.job.list(
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(JobListResponse, job, path=["response"])
-
     @parametrize
     def test_raw_response_list(self, client: LlamaStackClient) -> None:
         response = client.post_training.job.with_raw_response.list()
@@ -61,15 +53,6 @@ def test_method_artifacts(self, client: LlamaStackClient) -> None:
         )
         assert_matches_type(Optional[JobArtifactsResponse], job, path=["response"])
 
-    @parametrize
-    def test_method_artifacts_with_all_params(self, client: LlamaStackClient) -> None:
-        job = client.post_training.job.artifacts(
-            job_uuid="job_uuid",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(Optional[JobArtifactsResponse], job, path=["response"])
-
     @parametrize
     def test_raw_response_artifacts(self, client: LlamaStackClient) -> None:
         response = client.post_training.job.with_raw_response.artifacts(
@@ -101,15 +84,6 @@ def test_method_cancel(self, client: LlamaStackClient) -> None:
         )
         assert job is None
 
-    @parametrize
-    def test_method_cancel_with_all_params(self, client: LlamaStackClient) -> None:
-        job = client.post_training.job.cancel(
-            job_uuid="job_uuid",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert job is None
-
     @parametrize
     def test_raw_response_cancel(self, client: LlamaStackClient) -> None:
         response = client.post_training.job.with_raw_response.cancel(
@@ -141,15 +115,6 @@ def test_method_status(self, client: LlamaStackClient) -> None:
         )
         assert_matches_type(Optional[JobStatusResponse], job, path=["response"])
 
-    @parametrize
-    def test_method_status_with_all_params(self, client: LlamaStackClient) -> None:
-        job = client.post_training.job.status(
-            job_uuid="job_uuid",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(Optional[JobStatusResponse], job, path=["response"])
-
     @parametrize
     def test_raw_response_status(self, client: LlamaStackClient) -> None:
         response = client.post_training.job.with_raw_response.status(
@@ -183,14 +148,6 @@ async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
         job = await async_client.post_training.job.list()
         assert_matches_type(JobListResponse, job, path=["response"])
 
-    @parametrize
-    async def test_method_list_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        job = await async_client.post_training.job.list(
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(JobListResponse, job, path=["response"])
-
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.post_training.job.with_raw_response.list()
@@ -218,15 +175,6 @@ async def test_method_artifacts(self, async_client: AsyncLlamaStackClient) -> No
         )
         assert_matches_type(Optional[JobArtifactsResponse], job, path=["response"])
 
-    @parametrize
-    async def test_method_artifacts_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        job = await async_client.post_training.job.artifacts(
-            job_uuid="job_uuid",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(Optional[JobArtifactsResponse], job, path=["response"])
-
     @parametrize
     async def test_raw_response_artifacts(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.post_training.job.with_raw_response.artifacts(
@@ -258,15 +206,6 @@ async def test_method_cancel(self, async_client: AsyncLlamaStackClient) -> None:
         )
         assert job is None
 
-    @parametrize
-    async def test_method_cancel_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        job = await async_client.post_training.job.cancel(
-            job_uuid="job_uuid",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert job is None
-
     @parametrize
     async def test_raw_response_cancel(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.post_training.job.with_raw_response.cancel(
@@ -298,15 +237,6 @@ async def test_method_status(self, async_client: AsyncLlamaStackClient) -> None:
         )
         assert_matches_type(Optional[JobStatusResponse], job, path=["response"])
 
-    @parametrize
-    async def test_method_status_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        job = await async_client.post_training.job.status(
-            job_uuid="job_uuid",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(Optional[JobStatusResponse], job, path=["response"])
-
     @parametrize
     async def test_raw_response_status(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.post_training.job.with_raw_response.status(
diff --git a/tests/api_resources/test_agents.py b/tests/api_resources/test_agents.py
index 21c361e5..30dd9ac9 100644
--- a/tests/api_resources/test_agents.py
+++ b/tests/api_resources/test_agents.py
@@ -68,8 +68,6 @@ def test_method_create_with_all_params(self, client: LlamaStackClient) -> None:
                 "tool_prompt_format": "json",
                 "toolgroups": ["string"],
             },
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(AgentCreateResponse, agent, path=["response"])
 
@@ -110,23 +108,14 @@ def test_streaming_response_create(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_method_delete(self, client: LlamaStackClient) -> None:
         agent = client.agents.delete(
-            agent_id="agent_id",
-        )
-        assert agent is None
-
-    @parametrize
-    def test_method_delete_with_all_params(self, client: LlamaStackClient) -> None:
-        agent = client.agents.delete(
-            agent_id="agent_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
+            "agent_id",
         )
         assert agent is None
 
     @parametrize
     def test_raw_response_delete(self, client: LlamaStackClient) -> None:
         response = client.agents.with_raw_response.delete(
-            agent_id="agent_id",
+            "agent_id",
         )
 
         assert response.is_closed is True
@@ -137,7 +126,7 @@ def test_raw_response_delete(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_streaming_response_delete(self, client: LlamaStackClient) -> None:
         with client.agents.with_streaming_response.delete(
-            agent_id="agent_id",
+            "agent_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -151,7 +140,7 @@ def test_streaming_response_delete(self, client: LlamaStackClient) -> None:
     def test_path_params_delete(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
             client.agents.with_raw_response.delete(
-                agent_id="",
+                "",
             )
 
 
@@ -209,8 +198,6 @@ async def test_method_create_with_all_params(self, async_client: AsyncLlamaStack
                 "tool_prompt_format": "json",
                 "toolgroups": ["string"],
             },
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(AgentCreateResponse, agent, path=["response"])
 
@@ -251,23 +238,14 @@ async def test_streaming_response_create(self, async_client: AsyncLlamaStackClie
     @parametrize
     async def test_method_delete(self, async_client: AsyncLlamaStackClient) -> None:
         agent = await async_client.agents.delete(
-            agent_id="agent_id",
-        )
-        assert agent is None
-
-    @parametrize
-    async def test_method_delete_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        agent = await async_client.agents.delete(
-            agent_id="agent_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
+            "agent_id",
         )
         assert agent is None
 
     @parametrize
     async def test_raw_response_delete(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.agents.with_raw_response.delete(
-            agent_id="agent_id",
+            "agent_id",
         )
 
         assert response.is_closed is True
@@ -278,7 +256,7 @@ async def test_raw_response_delete(self, async_client: AsyncLlamaStackClient) ->
     @parametrize
     async def test_streaming_response_delete(self, async_client: AsyncLlamaStackClient) -> None:
         async with async_client.agents.with_streaming_response.delete(
-            agent_id="agent_id",
+            "agent_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -292,5 +270,5 @@ async def test_streaming_response_delete(self, async_client: AsyncLlamaStackClie
     async def test_path_params_delete(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
             await async_client.agents.with_raw_response.delete(
-                agent_id="",
+                "",
             )
diff --git a/tests/api_resources/test_batch_inference.py b/tests/api_resources/test_batch_inference.py
index 1bb67ca9..8e5cb9e5 100644
--- a/tests/api_resources/test_batch_inference.py
+++ b/tests/api_resources/test_batch_inference.py
@@ -49,6 +49,10 @@ def test_method_chat_completion_with_all_params(self, client: LlamaStackClient)
             ],
             model="model",
             logprobs={"top_k": 0},
+            response_format={
+                "json_schema": {"foo": True},
+                "type": "json_schema",
+            },
             sampling_params={
                 "strategy": {"type": "greedy"},
                 "max_tokens": 0,
@@ -70,8 +74,6 @@ def test_method_chat_completion_with_all_params(self, client: LlamaStackClient)
                     },
                 }
             ],
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(BatchInferenceChatCompletionResponse, batch_inference, path=["response"])
 
@@ -129,13 +131,15 @@ def test_method_completion_with_all_params(self, client: LlamaStackClient) -> No
             content_batch=["string"],
             model="model",
             logprobs={"top_k": 0},
+            response_format={
+                "json_schema": {"foo": True},
+                "type": "json_schema",
+            },
             sampling_params={
                 "strategy": {"type": "greedy"},
                 "max_tokens": 0,
                 "repetition_penalty": 0,
             },
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(BatchCompletion, batch_inference, path=["response"])
 
@@ -198,6 +202,10 @@ async def test_method_chat_completion_with_all_params(self, async_client: AsyncL
             ],
             model="model",
             logprobs={"top_k": 0},
+            response_format={
+                "json_schema": {"foo": True},
+                "type": "json_schema",
+            },
             sampling_params={
                 "strategy": {"type": "greedy"},
                 "max_tokens": 0,
@@ -219,8 +227,6 @@ async def test_method_chat_completion_with_all_params(self, async_client: AsyncL
                     },
                 }
             ],
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(BatchInferenceChatCompletionResponse, batch_inference, path=["response"])
 
@@ -278,13 +284,15 @@ async def test_method_completion_with_all_params(self, async_client: AsyncLlamaS
             content_batch=["string"],
             model="model",
             logprobs={"top_k": 0},
+            response_format={
+                "json_schema": {"foo": True},
+                "type": "json_schema",
+            },
             sampling_params={
                 "strategy": {"type": "greedy"},
                 "max_tokens": 0,
                 "repetition_penalty": 0,
             },
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(BatchCompletion, batch_inference, path=["response"])
 
diff --git a/tests/api_resources/test_datasetio.py b/tests/api_resources/test_datasetio.py
index 1529d447..cfd72d94 100644
--- a/tests/api_resources/test_datasetio.py
+++ b/tests/api_resources/test_datasetio.py
@@ -27,16 +27,6 @@ def test_method_append_rows(self, client: LlamaStackClient) -> None:
         )
         assert datasetio is None
 
-    @parametrize
-    def test_method_append_rows_with_all_params(self, client: LlamaStackClient) -> None:
-        datasetio = client.datasetio.append_rows(
-            dataset_id="dataset_id",
-            rows=[{"foo": True}],
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert datasetio is None
-
     @parametrize
     def test_raw_response_append_rows(self, client: LlamaStackClient) -> None:
         response = client.datasetio.with_raw_response.append_rows(
@@ -78,8 +68,6 @@ def test_method_get_rows_paginated_with_all_params(self, client: LlamaStackClien
             rows_in_page=0,
             filter_condition="filter_condition",
             page_token="page_token",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(PaginatedRowsResult, datasetio, path=["response"])
 
@@ -121,16 +109,6 @@ async def test_method_append_rows(self, async_client: AsyncLlamaStackClient) ->
         )
         assert datasetio is None
 
-    @parametrize
-    async def test_method_append_rows_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        datasetio = await async_client.datasetio.append_rows(
-            dataset_id="dataset_id",
-            rows=[{"foo": True}],
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert datasetio is None
-
     @parametrize
     async def test_raw_response_append_rows(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.datasetio.with_raw_response.append_rows(
@@ -172,8 +150,6 @@ async def test_method_get_rows_paginated_with_all_params(self, async_client: Asy
             rows_in_page=0,
             filter_condition="filter_condition",
             page_token="page_token",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(PaginatedRowsResult, datasetio, path=["response"])
 
diff --git a/tests/api_resources/test_datasets.py b/tests/api_resources/test_datasets.py
index 98c903ad..987f3c22 100644
--- a/tests/api_resources/test_datasets.py
+++ b/tests/api_resources/test_datasets.py
@@ -20,23 +20,14 @@ class TestDatasets:
     @parametrize
     def test_method_retrieve(self, client: LlamaStackClient) -> None:
         dataset = client.datasets.retrieve(
-            dataset_id="dataset_id",
-        )
-        assert_matches_type(Optional[DatasetRetrieveResponse], dataset, path=["response"])
-
-    @parametrize
-    def test_method_retrieve_with_all_params(self, client: LlamaStackClient) -> None:
-        dataset = client.datasets.retrieve(
-            dataset_id="dataset_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
+            "dataset_id",
         )
         assert_matches_type(Optional[DatasetRetrieveResponse], dataset, path=["response"])
 
     @parametrize
     def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
         response = client.datasets.with_raw_response.retrieve(
-            dataset_id="dataset_id",
+            "dataset_id",
         )
 
         assert response.is_closed is True
@@ -47,7 +38,7 @@ def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
         with client.datasets.with_streaming_response.retrieve(
-            dataset_id="dataset_id",
+            "dataset_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -61,7 +52,7 @@ def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
     def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `dataset_id` but received ''"):
             client.datasets.with_raw_response.retrieve(
-                dataset_id="",
+                "",
             )
 
     @parametrize
@@ -69,14 +60,6 @@ def test_method_list(self, client: LlamaStackClient) -> None:
         dataset = client.datasets.list()
         assert_matches_type(DatasetListResponse, dataset, path=["response"])
 
-    @parametrize
-    def test_method_list_with_all_params(self, client: LlamaStackClient) -> None:
-        dataset = client.datasets.list(
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(DatasetListResponse, dataset, path=["response"])
-
     @parametrize
     def test_raw_response_list(self, client: LlamaStackClient) -> None:
         response = client.datasets.with_raw_response.list()
@@ -115,8 +98,6 @@ def test_method_register_with_all_params(self, client: LlamaStackClient) -> None
             metadata={"foo": True},
             provider_dataset_id="provider_dataset_id",
             provider_id="provider_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert dataset is None
 
@@ -151,23 +132,14 @@ def test_streaming_response_register(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_method_unregister(self, client: LlamaStackClient) -> None:
         dataset = client.datasets.unregister(
-            dataset_id="dataset_id",
-        )
-        assert dataset is None
-
-    @parametrize
-    def test_method_unregister_with_all_params(self, client: LlamaStackClient) -> None:
-        dataset = client.datasets.unregister(
-            dataset_id="dataset_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
+            "dataset_id",
         )
         assert dataset is None
 
     @parametrize
     def test_raw_response_unregister(self, client: LlamaStackClient) -> None:
         response = client.datasets.with_raw_response.unregister(
-            dataset_id="dataset_id",
+            "dataset_id",
         )
 
         assert response.is_closed is True
@@ -178,7 +150,7 @@ def test_raw_response_unregister(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_streaming_response_unregister(self, client: LlamaStackClient) -> None:
         with client.datasets.with_streaming_response.unregister(
-            dataset_id="dataset_id",
+            "dataset_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -192,7 +164,7 @@ def test_streaming_response_unregister(self, client: LlamaStackClient) -> None:
     def test_path_params_unregister(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `dataset_id` but received ''"):
             client.datasets.with_raw_response.unregister(
-                dataset_id="",
+                "",
             )
 
 
@@ -202,23 +174,14 @@ class TestAsyncDatasets:
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
         dataset = await async_client.datasets.retrieve(
-            dataset_id="dataset_id",
-        )
-        assert_matches_type(Optional[DatasetRetrieveResponse], dataset, path=["response"])
-
-    @parametrize
-    async def test_method_retrieve_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        dataset = await async_client.datasets.retrieve(
-            dataset_id="dataset_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
+            "dataset_id",
         )
         assert_matches_type(Optional[DatasetRetrieveResponse], dataset, path=["response"])
 
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.datasets.with_raw_response.retrieve(
-            dataset_id="dataset_id",
+            "dataset_id",
         )
 
         assert response.is_closed is True
@@ -229,7 +192,7 @@ async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient)
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
         async with async_client.datasets.with_streaming_response.retrieve(
-            dataset_id="dataset_id",
+            "dataset_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -243,7 +206,7 @@ async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackCl
     async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `dataset_id` but received ''"):
             await async_client.datasets.with_raw_response.retrieve(
-                dataset_id="",
+                "",
             )
 
     @parametrize
@@ -251,14 +214,6 @@ async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
         dataset = await async_client.datasets.list()
         assert_matches_type(DatasetListResponse, dataset, path=["response"])
 
-    @parametrize
-    async def test_method_list_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        dataset = await async_client.datasets.list(
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(DatasetListResponse, dataset, path=["response"])
-
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.datasets.with_raw_response.list()
@@ -297,8 +252,6 @@ async def test_method_register_with_all_params(self, async_client: AsyncLlamaSta
             metadata={"foo": True},
             provider_dataset_id="provider_dataset_id",
             provider_id="provider_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert dataset is None
 
@@ -333,23 +286,14 @@ async def test_streaming_response_register(self, async_client: AsyncLlamaStackCl
     @parametrize
     async def test_method_unregister(self, async_client: AsyncLlamaStackClient) -> None:
         dataset = await async_client.datasets.unregister(
-            dataset_id="dataset_id",
-        )
-        assert dataset is None
-
-    @parametrize
-    async def test_method_unregister_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        dataset = await async_client.datasets.unregister(
-            dataset_id="dataset_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
+            "dataset_id",
         )
         assert dataset is None
 
     @parametrize
     async def test_raw_response_unregister(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.datasets.with_raw_response.unregister(
-            dataset_id="dataset_id",
+            "dataset_id",
         )
 
         assert response.is_closed is True
@@ -360,7 +304,7 @@ async def test_raw_response_unregister(self, async_client: AsyncLlamaStackClient
     @parametrize
     async def test_streaming_response_unregister(self, async_client: AsyncLlamaStackClient) -> None:
         async with async_client.datasets.with_streaming_response.unregister(
-            dataset_id="dataset_id",
+            "dataset_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -374,5 +318,5 @@ async def test_streaming_response_unregister(self, async_client: AsyncLlamaStack
     async def test_path_params_unregister(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `dataset_id` but received ''"):
             await async_client.datasets.with_raw_response.unregister(
-                dataset_id="",
+                "",
             )
diff --git a/tests/api_resources/test_eval.py b/tests/api_resources/test_eval.py
index d1913199..52556bf2 100644
--- a/tests/api_resources/test_eval.py
+++ b/tests/api_resources/test_eval.py
@@ -60,8 +60,6 @@ def test_method_evaluate_rows_with_all_params(self, client: LlamaStackClient) ->
                 "type": "benchmark",
                 "num_examples": 0,
             },
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(EvaluateResponse, eval, path=["response"])
 
@@ -162,8 +160,6 @@ def test_method_run_eval_with_all_params(self, client: LlamaStackClient) -> None
                 "type": "benchmark",
                 "num_examples": 0,
             },
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(Job, eval, path=["response"])
 
@@ -266,8 +262,6 @@ async def test_method_evaluate_rows_with_all_params(self, async_client: AsyncLla
                 "type": "benchmark",
                 "num_examples": 0,
             },
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(EvaluateResponse, eval, path=["response"])
 
@@ -368,8 +362,6 @@ async def test_method_run_eval_with_all_params(self, async_client: AsyncLlamaSta
                 "type": "benchmark",
                 "num_examples": 0,
             },
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(Job, eval, path=["response"])
 
diff --git a/tests/api_resources/test_eval_tasks.py b/tests/api_resources/test_eval_tasks.py
index ac2d8eb8..5b18621b 100644
--- a/tests/api_resources/test_eval_tasks.py
+++ b/tests/api_resources/test_eval_tasks.py
@@ -20,23 +20,14 @@ class TestEvalTasks:
     @parametrize
     def test_method_retrieve(self, client: LlamaStackClient) -> None:
         eval_task = client.eval_tasks.retrieve(
-            eval_task_id="eval_task_id",
-        )
-        assert_matches_type(Optional[EvalTask], eval_task, path=["response"])
-
-    @parametrize
-    def test_method_retrieve_with_all_params(self, client: LlamaStackClient) -> None:
-        eval_task = client.eval_tasks.retrieve(
-            eval_task_id="eval_task_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
+            "eval_task_id",
         )
         assert_matches_type(Optional[EvalTask], eval_task, path=["response"])
 
     @parametrize
     def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
         response = client.eval_tasks.with_raw_response.retrieve(
-            eval_task_id="eval_task_id",
+            "eval_task_id",
         )
 
         assert response.is_closed is True
@@ -47,7 +38,7 @@ def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
         with client.eval_tasks.with_streaming_response.retrieve(
-            eval_task_id="eval_task_id",
+            "eval_task_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -61,7 +52,7 @@ def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
     def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_task_id` but received ''"):
             client.eval_tasks.with_raw_response.retrieve(
-                eval_task_id="",
+                "",
             )
 
     @parametrize
@@ -69,14 +60,6 @@ def test_method_list(self, client: LlamaStackClient) -> None:
         eval_task = client.eval_tasks.list()
         assert_matches_type(EvalTaskListResponse, eval_task, path=["response"])
 
-    @parametrize
-    def test_method_list_with_all_params(self, client: LlamaStackClient) -> None:
-        eval_task = client.eval_tasks.list(
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(EvalTaskListResponse, eval_task, path=["response"])
-
     @parametrize
     def test_raw_response_list(self, client: LlamaStackClient) -> None:
         response = client.eval_tasks.with_raw_response.list()
@@ -115,8 +98,6 @@ def test_method_register_with_all_params(self, client: LlamaStackClient) -> None
             metadata={"foo": True},
             provider_eval_task_id="provider_eval_task_id",
             provider_id="provider_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert eval_task is None
 
@@ -155,23 +136,14 @@ class TestAsyncEvalTasks:
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
         eval_task = await async_client.eval_tasks.retrieve(
-            eval_task_id="eval_task_id",
-        )
-        assert_matches_type(Optional[EvalTask], eval_task, path=["response"])
-
-    @parametrize
-    async def test_method_retrieve_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        eval_task = await async_client.eval_tasks.retrieve(
-            eval_task_id="eval_task_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
+            "eval_task_id",
         )
         assert_matches_type(Optional[EvalTask], eval_task, path=["response"])
 
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.eval_tasks.with_raw_response.retrieve(
-            eval_task_id="eval_task_id",
+            "eval_task_id",
         )
 
         assert response.is_closed is True
@@ -182,7 +154,7 @@ async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient)
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
         async with async_client.eval_tasks.with_streaming_response.retrieve(
-            eval_task_id="eval_task_id",
+            "eval_task_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -196,7 +168,7 @@ async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackCl
     async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_task_id` but received ''"):
             await async_client.eval_tasks.with_raw_response.retrieve(
-                eval_task_id="",
+                "",
             )
 
     @parametrize
@@ -204,14 +176,6 @@ async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
         eval_task = await async_client.eval_tasks.list()
         assert_matches_type(EvalTaskListResponse, eval_task, path=["response"])
 
-    @parametrize
-    async def test_method_list_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        eval_task = await async_client.eval_tasks.list(
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(EvalTaskListResponse, eval_task, path=["response"])
-
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.eval_tasks.with_raw_response.list()
@@ -250,8 +214,6 @@ async def test_method_register_with_all_params(self, async_client: AsyncLlamaSta
             metadata={"foo": True},
             provider_eval_task_id="provider_eval_task_id",
             provider_id="provider_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert eval_task is None
 
diff --git a/tests/api_resources/test_inference.py b/tests/api_resources/test_inference.py
index f992ee2d..ab03db1c 100644
--- a/tests/api_resources/test_inference.py
+++ b/tests/api_resources/test_inference.py
@@ -78,8 +78,6 @@ def test_method_chat_completion_with_all_params_overload_1(self, client: LlamaSt
                     },
                 }
             ],
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(InferenceChatCompletionResponse, inference, path=["response"])
 
@@ -183,8 +181,6 @@ def test_method_chat_completion_with_all_params_overload_2(self, client: LlamaSt
                     },
                 }
             ],
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         inference_stream.response.close()
 
@@ -261,8 +257,6 @@ def test_method_completion_with_all_params_overload_1(self, client: LlamaStackCl
                 "repetition_penalty": 0,
             },
             stream=False,
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(InferenceCompletionResponse, inference, path=["response"])
 
@@ -329,8 +323,6 @@ def test_method_completion_with_all_params_overload_2(self, client: LlamaStackCl
                 "max_tokens": 0,
                 "repetition_penalty": 0,
             },
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         inference_stream.response.close()
 
@@ -375,16 +367,6 @@ def test_method_embeddings(self, client: LlamaStackClient) -> None:
         )
         assert_matches_type(EmbeddingsResponse, inference, path=["response"])
 
-    @parametrize
-    def test_method_embeddings_with_all_params(self, client: LlamaStackClient) -> None:
-        inference = client.inference.embeddings(
-            contents=["string"],
-            model_id="model_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(EmbeddingsResponse, inference, path=["response"])
-
     @parametrize
     def test_raw_response_embeddings(self, client: LlamaStackClient) -> None:
         response = client.inference.with_raw_response.embeddings(
@@ -472,8 +454,6 @@ async def test_method_chat_completion_with_all_params_overload_1(self, async_cli
                     },
                 }
             ],
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(InferenceChatCompletionResponse, inference, path=["response"])
 
@@ -577,8 +557,6 @@ async def test_method_chat_completion_with_all_params_overload_2(self, async_cli
                     },
                 }
             ],
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         await inference_stream.response.aclose()
 
@@ -655,8 +633,6 @@ async def test_method_completion_with_all_params_overload_1(self, async_client:
                 "repetition_penalty": 0,
             },
             stream=False,
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(InferenceCompletionResponse, inference, path=["response"])
 
@@ -723,8 +699,6 @@ async def test_method_completion_with_all_params_overload_2(self, async_client:
                 "max_tokens": 0,
                 "repetition_penalty": 0,
             },
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         await inference_stream.response.aclose()
 
@@ -769,16 +743,6 @@ async def test_method_embeddings(self, async_client: AsyncLlamaStackClient) -> N
         )
         assert_matches_type(EmbeddingsResponse, inference, path=["response"])
 
-    @parametrize
-    async def test_method_embeddings_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        inference = await async_client.inference.embeddings(
-            contents=["string"],
-            model_id="model_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(EmbeddingsResponse, inference, path=["response"])
-
     @parametrize
     async def test_raw_response_embeddings(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.inference.with_raw_response.embeddings(
diff --git a/tests/api_resources/test_inspect.py b/tests/api_resources/test_inspect.py
index d27a9adb..e72ce766 100644
--- a/tests/api_resources/test_inspect.py
+++ b/tests/api_resources/test_inspect.py
@@ -22,14 +22,6 @@ def test_method_health(self, client: LlamaStackClient) -> None:
         inspect = client.inspect.health()
         assert_matches_type(HealthInfo, inspect, path=["response"])
 
-    @parametrize
-    def test_method_health_with_all_params(self, client: LlamaStackClient) -> None:
-        inspect = client.inspect.health(
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(HealthInfo, inspect, path=["response"])
-
     @parametrize
     def test_raw_response_health(self, client: LlamaStackClient) -> None:
         response = client.inspect.with_raw_response.health()
@@ -55,14 +47,6 @@ def test_method_version(self, client: LlamaStackClient) -> None:
         inspect = client.inspect.version()
         assert_matches_type(VersionInfo, inspect, path=["response"])
 
-    @parametrize
-    def test_method_version_with_all_params(self, client: LlamaStackClient) -> None:
-        inspect = client.inspect.version(
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(VersionInfo, inspect, path=["response"])
-
     @parametrize
     def test_raw_response_version(self, client: LlamaStackClient) -> None:
         response = client.inspect.with_raw_response.version()
@@ -92,14 +76,6 @@ async def test_method_health(self, async_client: AsyncLlamaStackClient) -> None:
         inspect = await async_client.inspect.health()
         assert_matches_type(HealthInfo, inspect, path=["response"])
 
-    @parametrize
-    async def test_method_health_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        inspect = await async_client.inspect.health(
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(HealthInfo, inspect, path=["response"])
-
     @parametrize
     async def test_raw_response_health(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.inspect.with_raw_response.health()
@@ -125,14 +101,6 @@ async def test_method_version(self, async_client: AsyncLlamaStackClient) -> None
         inspect = await async_client.inspect.version()
         assert_matches_type(VersionInfo, inspect, path=["response"])
 
-    @parametrize
-    async def test_method_version_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        inspect = await async_client.inspect.version(
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(VersionInfo, inspect, path=["response"])
-
     @parametrize
     async def test_raw_response_version(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.inspect.with_raw_response.version()
diff --git a/tests/api_resources/test_models.py b/tests/api_resources/test_models.py
index b394ebe9..c38903d5 100644
--- a/tests/api_resources/test_models.py
+++ b/tests/api_resources/test_models.py
@@ -20,23 +20,14 @@ class TestModels:
     @parametrize
     def test_method_retrieve(self, client: LlamaStackClient) -> None:
         model = client.models.retrieve(
-            model_id="model_id",
-        )
-        assert_matches_type(Optional[Model], model, path=["response"])
-
-    @parametrize
-    def test_method_retrieve_with_all_params(self, client: LlamaStackClient) -> None:
-        model = client.models.retrieve(
-            model_id="model_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
+            "model_id",
         )
         assert_matches_type(Optional[Model], model, path=["response"])
 
     @parametrize
     def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
         response = client.models.with_raw_response.retrieve(
-            model_id="model_id",
+            "model_id",
         )
 
         assert response.is_closed is True
@@ -47,7 +38,7 @@ def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
         with client.models.with_streaming_response.retrieve(
-            model_id="model_id",
+            "model_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -61,7 +52,7 @@ def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
     def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `model_id` but received ''"):
             client.models.with_raw_response.retrieve(
-                model_id="",
+                "",
             )
 
     @parametrize
@@ -69,14 +60,6 @@ def test_method_list(self, client: LlamaStackClient) -> None:
         model = client.models.list()
         assert_matches_type(ModelListResponse, model, path=["response"])
 
-    @parametrize
-    def test_method_list_with_all_params(self, client: LlamaStackClient) -> None:
-        model = client.models.list(
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(ModelListResponse, model, path=["response"])
-
     @parametrize
     def test_raw_response_list(self, client: LlamaStackClient) -> None:
         response = client.models.with_raw_response.list()
@@ -112,8 +95,6 @@ def test_method_register_with_all_params(self, client: LlamaStackClient) -> None
             model_type="llm",
             provider_id="provider_id",
             provider_model_id="provider_model_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(Model, model, path=["response"])
 
@@ -144,23 +125,14 @@ def test_streaming_response_register(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_method_unregister(self, client: LlamaStackClient) -> None:
         model = client.models.unregister(
-            model_id="model_id",
-        )
-        assert model is None
-
-    @parametrize
-    def test_method_unregister_with_all_params(self, client: LlamaStackClient) -> None:
-        model = client.models.unregister(
-            model_id="model_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
+            "model_id",
         )
         assert model is None
 
     @parametrize
     def test_raw_response_unregister(self, client: LlamaStackClient) -> None:
         response = client.models.with_raw_response.unregister(
-            model_id="model_id",
+            "model_id",
         )
 
         assert response.is_closed is True
@@ -171,7 +143,7 @@ def test_raw_response_unregister(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_streaming_response_unregister(self, client: LlamaStackClient) -> None:
         with client.models.with_streaming_response.unregister(
-            model_id="model_id",
+            "model_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -185,7 +157,7 @@ def test_streaming_response_unregister(self, client: LlamaStackClient) -> None:
     def test_path_params_unregister(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `model_id` but received ''"):
             client.models.with_raw_response.unregister(
-                model_id="",
+                "",
             )
 
 
@@ -195,23 +167,14 @@ class TestAsyncModels:
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
         model = await async_client.models.retrieve(
-            model_id="model_id",
-        )
-        assert_matches_type(Optional[Model], model, path=["response"])
-
-    @parametrize
-    async def test_method_retrieve_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        model = await async_client.models.retrieve(
-            model_id="model_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
+            "model_id",
         )
         assert_matches_type(Optional[Model], model, path=["response"])
 
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.models.with_raw_response.retrieve(
-            model_id="model_id",
+            "model_id",
         )
 
         assert response.is_closed is True
@@ -222,7 +185,7 @@ async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient)
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
         async with async_client.models.with_streaming_response.retrieve(
-            model_id="model_id",
+            "model_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -236,7 +199,7 @@ async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackCl
     async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `model_id` but received ''"):
             await async_client.models.with_raw_response.retrieve(
-                model_id="",
+                "",
             )
 
     @parametrize
@@ -244,14 +207,6 @@ async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
         model = await async_client.models.list()
         assert_matches_type(ModelListResponse, model, path=["response"])
 
-    @parametrize
-    async def test_method_list_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        model = await async_client.models.list(
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(ModelListResponse, model, path=["response"])
-
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.models.with_raw_response.list()
@@ -287,8 +242,6 @@ async def test_method_register_with_all_params(self, async_client: AsyncLlamaSta
             model_type="llm",
             provider_id="provider_id",
             provider_model_id="provider_model_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(Model, model, path=["response"])
 
@@ -319,23 +272,14 @@ async def test_streaming_response_register(self, async_client: AsyncLlamaStackCl
     @parametrize
     async def test_method_unregister(self, async_client: AsyncLlamaStackClient) -> None:
         model = await async_client.models.unregister(
-            model_id="model_id",
-        )
-        assert model is None
-
-    @parametrize
-    async def test_method_unregister_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        model = await async_client.models.unregister(
-            model_id="model_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
+            "model_id",
         )
         assert model is None
 
     @parametrize
     async def test_raw_response_unregister(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.models.with_raw_response.unregister(
-            model_id="model_id",
+            "model_id",
         )
 
         assert response.is_closed is True
@@ -346,7 +290,7 @@ async def test_raw_response_unregister(self, async_client: AsyncLlamaStackClient
     @parametrize
     async def test_streaming_response_unregister(self, async_client: AsyncLlamaStackClient) -> None:
         async with async_client.models.with_streaming_response.unregister(
-            model_id="model_id",
+            "model_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -360,5 +304,5 @@ async def test_streaming_response_unregister(self, async_client: AsyncLlamaStack
     async def test_path_params_unregister(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `model_id` but received ''"):
             await async_client.models.with_raw_response.unregister(
-                model_id="",
+                "",
             )
diff --git a/tests/api_resources/test_post_training.py b/tests/api_resources/test_post_training.py
index 68041f24..98047e4c 100644
--- a/tests/api_resources/test_post_training.py
+++ b/tests/api_resources/test_post_training.py
@@ -94,8 +94,6 @@ def test_method_preference_optimize_with_all_params(self, client: LlamaStackClie
                     "memory_efficient_fsdp_wrap": True,
                 },
             },
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(PostTrainingJob, post_training, path=["response"])
 
@@ -251,8 +249,6 @@ def test_method_supervised_fine_tune_with_all_params(self, client: LlamaStackCli
                 "use_dora": True,
             },
             checkpoint_dir="checkpoint_dir",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(PostTrainingJob, post_training, path=["response"])
 
@@ -401,8 +397,6 @@ async def test_method_preference_optimize_with_all_params(self, async_client: As
                     "memory_efficient_fsdp_wrap": True,
                 },
             },
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(PostTrainingJob, post_training, path=["response"])
 
@@ -558,8 +552,6 @@ async def test_method_supervised_fine_tune_with_all_params(self, async_client: A
                 "use_dora": True,
             },
             checkpoint_dir="checkpoint_dir",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(PostTrainingJob, post_training, path=["response"])
 
diff --git a/tests/api_resources/test_providers.py b/tests/api_resources/test_providers.py
index 2b3c2275..46017c0e 100644
--- a/tests/api_resources/test_providers.py
+++ b/tests/api_resources/test_providers.py
@@ -22,14 +22,6 @@ def test_method_list(self, client: LlamaStackClient) -> None:
         provider = client.providers.list()
         assert_matches_type(ProviderListResponse, provider, path=["response"])
 
-    @parametrize
-    def test_method_list_with_all_params(self, client: LlamaStackClient) -> None:
-        provider = client.providers.list(
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(ProviderListResponse, provider, path=["response"])
-
     @parametrize
     def test_raw_response_list(self, client: LlamaStackClient) -> None:
         response = client.providers.with_raw_response.list()
@@ -59,14 +51,6 @@ async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
         provider = await async_client.providers.list()
         assert_matches_type(ProviderListResponse, provider, path=["response"])
 
-    @parametrize
-    async def test_method_list_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        provider = await async_client.providers.list(
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(ProviderListResponse, provider, path=["response"])
-
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.providers.with_raw_response.list()
diff --git a/tests/api_resources/test_routes.py b/tests/api_resources/test_routes.py
index 8a731f11..d434d5b8 100644
--- a/tests/api_resources/test_routes.py
+++ b/tests/api_resources/test_routes.py
@@ -22,14 +22,6 @@ def test_method_list(self, client: LlamaStackClient) -> None:
         route = client.routes.list()
         assert_matches_type(RouteListResponse, route, path=["response"])
 
-    @parametrize
-    def test_method_list_with_all_params(self, client: LlamaStackClient) -> None:
-        route = client.routes.list(
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(RouteListResponse, route, path=["response"])
-
     @parametrize
     def test_raw_response_list(self, client: LlamaStackClient) -> None:
         response = client.routes.with_raw_response.list()
@@ -59,14 +51,6 @@ async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
         route = await async_client.routes.list()
         assert_matches_type(RouteListResponse, route, path=["response"])
 
-    @parametrize
-    async def test_method_list_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        route = await async_client.routes.list(
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(RouteListResponse, route, path=["response"])
-
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.routes.with_raw_response.list()
diff --git a/tests/api_resources/test_safety.py b/tests/api_resources/test_safety.py
index 967132ae..e3dbe9b7 100644
--- a/tests/api_resources/test_safety.py
+++ b/tests/api_resources/test_safety.py
@@ -31,23 +31,6 @@ def test_method_run_shield(self, client: LlamaStackClient) -> None:
         )
         assert_matches_type(RunShieldResponse, safety, path=["response"])
 
-    @parametrize
-    def test_method_run_shield_with_all_params(self, client: LlamaStackClient) -> None:
-        safety = client.safety.run_shield(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                    "context": "string",
-                }
-            ],
-            params={"foo": True},
-            shield_id="shield_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(RunShieldResponse, safety, path=["response"])
-
     @parametrize
     def test_raw_response_run_shield(self, client: LlamaStackClient) -> None:
         response = client.safety.with_raw_response.run_shield(
@@ -104,23 +87,6 @@ async def test_method_run_shield(self, async_client: AsyncLlamaStackClient) -> N
         )
         assert_matches_type(RunShieldResponse, safety, path=["response"])
 
-    @parametrize
-    async def test_method_run_shield_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        safety = await async_client.safety.run_shield(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                    "context": "string",
-                }
-            ],
-            params={"foo": True},
-            shield_id="shield_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(RunShieldResponse, safety, path=["response"])
-
     @parametrize
     async def test_raw_response_run_shield(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.safety.with_raw_response.run_shield(
diff --git a/tests/api_resources/test_scoring.py b/tests/api_resources/test_scoring.py
index b841b5cc..da61ebcb 100644
--- a/tests/api_resources/test_scoring.py
+++ b/tests/api_resources/test_scoring.py
@@ -33,24 +33,6 @@ def test_method_score(self, client: LlamaStackClient) -> None:
         )
         assert_matches_type(ScoringScoreResponse, scoring, path=["response"])
 
-    @parametrize
-    def test_method_score_with_all_params(self, client: LlamaStackClient) -> None:
-        scoring = client.scoring.score(
-            input_rows=[{"foo": True}],
-            scoring_functions={
-                "foo": {
-                    "judge_model": "judge_model",
-                    "type": "llm_as_judge",
-                    "aggregation_functions": ["average"],
-                    "judge_score_regexes": ["string"],
-                    "prompt_template": "prompt_template",
-                }
-            },
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(ScoringScoreResponse, scoring, path=["response"])
-
     @parametrize
     def test_raw_response_score(self, client: LlamaStackClient) -> None:
         response = client.scoring.with_raw_response.score(
@@ -101,25 +83,6 @@ def test_method_score_batch(self, client: LlamaStackClient) -> None:
         )
         assert_matches_type(ScoringScoreBatchResponse, scoring, path=["response"])
 
-    @parametrize
-    def test_method_score_batch_with_all_params(self, client: LlamaStackClient) -> None:
-        scoring = client.scoring.score_batch(
-            dataset_id="dataset_id",
-            save_results_dataset=True,
-            scoring_functions={
-                "foo": {
-                    "judge_model": "judge_model",
-                    "type": "llm_as_judge",
-                    "aggregation_functions": ["average"],
-                    "judge_score_regexes": ["string"],
-                    "prompt_template": "prompt_template",
-                }
-            },
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(ScoringScoreBatchResponse, scoring, path=["response"])
-
     @parametrize
     def test_raw_response_score_batch(self, client: LlamaStackClient) -> None:
         response = client.scoring.with_raw_response.score_batch(
@@ -175,24 +138,6 @@ async def test_method_score(self, async_client: AsyncLlamaStackClient) -> None:
         )
         assert_matches_type(ScoringScoreResponse, scoring, path=["response"])
 
-    @parametrize
-    async def test_method_score_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        scoring = await async_client.scoring.score(
-            input_rows=[{"foo": True}],
-            scoring_functions={
-                "foo": {
-                    "judge_model": "judge_model",
-                    "type": "llm_as_judge",
-                    "aggregation_functions": ["average"],
-                    "judge_score_regexes": ["string"],
-                    "prompt_template": "prompt_template",
-                }
-            },
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(ScoringScoreResponse, scoring, path=["response"])
-
     @parametrize
     async def test_raw_response_score(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.scoring.with_raw_response.score(
@@ -243,25 +188,6 @@ async def test_method_score_batch(self, async_client: AsyncLlamaStackClient) ->
         )
         assert_matches_type(ScoringScoreBatchResponse, scoring, path=["response"])
 
-    @parametrize
-    async def test_method_score_batch_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        scoring = await async_client.scoring.score_batch(
-            dataset_id="dataset_id",
-            save_results_dataset=True,
-            scoring_functions={
-                "foo": {
-                    "judge_model": "judge_model",
-                    "type": "llm_as_judge",
-                    "aggregation_functions": ["average"],
-                    "judge_score_regexes": ["string"],
-                    "prompt_template": "prompt_template",
-                }
-            },
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(ScoringScoreBatchResponse, scoring, path=["response"])
-
     @parametrize
     async def test_raw_response_score_batch(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.scoring.with_raw_response.score_batch(
diff --git a/tests/api_resources/test_scoring_functions.py b/tests/api_resources/test_scoring_functions.py
index da89a3bb..44f8d3df 100644
--- a/tests/api_resources/test_scoring_functions.py
+++ b/tests/api_resources/test_scoring_functions.py
@@ -23,23 +23,14 @@ class TestScoringFunctions:
     @parametrize
     def test_method_retrieve(self, client: LlamaStackClient) -> None:
         scoring_function = client.scoring_functions.retrieve(
-            scoring_fn_id="scoring_fn_id",
-        )
-        assert_matches_type(Optional[ScoringFn], scoring_function, path=["response"])
-
-    @parametrize
-    def test_method_retrieve_with_all_params(self, client: LlamaStackClient) -> None:
-        scoring_function = client.scoring_functions.retrieve(
-            scoring_fn_id="scoring_fn_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
+            "scoring_fn_id",
         )
         assert_matches_type(Optional[ScoringFn], scoring_function, path=["response"])
 
     @parametrize
     def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
         response = client.scoring_functions.with_raw_response.retrieve(
-            scoring_fn_id="scoring_fn_id",
+            "scoring_fn_id",
         )
 
         assert response.is_closed is True
@@ -50,7 +41,7 @@ def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
         with client.scoring_functions.with_streaming_response.retrieve(
-            scoring_fn_id="scoring_fn_id",
+            "scoring_fn_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -64,7 +55,7 @@ def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
     def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `scoring_fn_id` but received ''"):
             client.scoring_functions.with_raw_response.retrieve(
-                scoring_fn_id="",
+                "",
             )
 
     @parametrize
@@ -72,14 +63,6 @@ def test_method_list(self, client: LlamaStackClient) -> None:
         scoring_function = client.scoring_functions.list()
         assert_matches_type(ScoringFunctionListResponse, scoring_function, path=["response"])
 
-    @parametrize
-    def test_method_list_with_all_params(self, client: LlamaStackClient) -> None:
-        scoring_function = client.scoring_functions.list(
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(ScoringFunctionListResponse, scoring_function, path=["response"])
-
     @parametrize
     def test_raw_response_list(self, client: LlamaStackClient) -> None:
         response = client.scoring_functions.with_raw_response.list()
@@ -124,8 +107,6 @@ def test_method_register_with_all_params(self, client: LlamaStackClient) -> None
             },
             provider_id="provider_id",
             provider_scoring_fn_id="provider_scoring_fn_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert scoring_function is None
 
@@ -164,23 +145,14 @@ class TestAsyncScoringFunctions:
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
         scoring_function = await async_client.scoring_functions.retrieve(
-            scoring_fn_id="scoring_fn_id",
-        )
-        assert_matches_type(Optional[ScoringFn], scoring_function, path=["response"])
-
-    @parametrize
-    async def test_method_retrieve_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        scoring_function = await async_client.scoring_functions.retrieve(
-            scoring_fn_id="scoring_fn_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
+            "scoring_fn_id",
         )
         assert_matches_type(Optional[ScoringFn], scoring_function, path=["response"])
 
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.scoring_functions.with_raw_response.retrieve(
-            scoring_fn_id="scoring_fn_id",
+            "scoring_fn_id",
         )
 
         assert response.is_closed is True
@@ -191,7 +163,7 @@ async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient)
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
         async with async_client.scoring_functions.with_streaming_response.retrieve(
-            scoring_fn_id="scoring_fn_id",
+            "scoring_fn_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -205,7 +177,7 @@ async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackCl
     async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `scoring_fn_id` but received ''"):
             await async_client.scoring_functions.with_raw_response.retrieve(
-                scoring_fn_id="",
+                "",
             )
 
     @parametrize
@@ -213,14 +185,6 @@ async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
         scoring_function = await async_client.scoring_functions.list()
         assert_matches_type(ScoringFunctionListResponse, scoring_function, path=["response"])
 
-    @parametrize
-    async def test_method_list_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        scoring_function = await async_client.scoring_functions.list(
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(ScoringFunctionListResponse, scoring_function, path=["response"])
-
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.scoring_functions.with_raw_response.list()
@@ -265,8 +229,6 @@ async def test_method_register_with_all_params(self, async_client: AsyncLlamaSta
             },
             provider_id="provider_id",
             provider_scoring_fn_id="provider_scoring_fn_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert scoring_function is None
 
diff --git a/tests/api_resources/test_shields.py b/tests/api_resources/test_shields.py
index aa6249e6..a32be825 100644
--- a/tests/api_resources/test_shields.py
+++ b/tests/api_resources/test_shields.py
@@ -20,23 +20,14 @@ class TestShields:
     @parametrize
     def test_method_retrieve(self, client: LlamaStackClient) -> None:
         shield = client.shields.retrieve(
-            identifier="identifier",
-        )
-        assert_matches_type(Optional[Shield], shield, path=["response"])
-
-    @parametrize
-    def test_method_retrieve_with_all_params(self, client: LlamaStackClient) -> None:
-        shield = client.shields.retrieve(
-            identifier="identifier",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
+            "identifier",
         )
         assert_matches_type(Optional[Shield], shield, path=["response"])
 
     @parametrize
     def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
         response = client.shields.with_raw_response.retrieve(
-            identifier="identifier",
+            "identifier",
         )
 
         assert response.is_closed is True
@@ -47,7 +38,7 @@ def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
         with client.shields.with_streaming_response.retrieve(
-            identifier="identifier",
+            "identifier",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -61,7 +52,7 @@ def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
     def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `identifier` but received ''"):
             client.shields.with_raw_response.retrieve(
-                identifier="",
+                "",
             )
 
     @parametrize
@@ -69,14 +60,6 @@ def test_method_list(self, client: LlamaStackClient) -> None:
         shield = client.shields.list()
         assert_matches_type(ShieldListResponse, shield, path=["response"])
 
-    @parametrize
-    def test_method_list_with_all_params(self, client: LlamaStackClient) -> None:
-        shield = client.shields.list(
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(ShieldListResponse, shield, path=["response"])
-
     @parametrize
     def test_raw_response_list(self, client: LlamaStackClient) -> None:
         response = client.shields.with_raw_response.list()
@@ -111,8 +94,6 @@ def test_method_register_with_all_params(self, client: LlamaStackClient) -> None
             params={"foo": True},
             provider_id="provider_id",
             provider_shield_id="provider_shield_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(Shield, shield, path=["response"])
 
@@ -147,23 +128,14 @@ class TestAsyncShields:
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
         shield = await async_client.shields.retrieve(
-            identifier="identifier",
-        )
-        assert_matches_type(Optional[Shield], shield, path=["response"])
-
-    @parametrize
-    async def test_method_retrieve_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        shield = await async_client.shields.retrieve(
-            identifier="identifier",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
+            "identifier",
         )
         assert_matches_type(Optional[Shield], shield, path=["response"])
 
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.shields.with_raw_response.retrieve(
-            identifier="identifier",
+            "identifier",
         )
 
         assert response.is_closed is True
@@ -174,7 +146,7 @@ async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient)
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
         async with async_client.shields.with_streaming_response.retrieve(
-            identifier="identifier",
+            "identifier",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -188,7 +160,7 @@ async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackCl
     async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `identifier` but received ''"):
             await async_client.shields.with_raw_response.retrieve(
-                identifier="",
+                "",
             )
 
     @parametrize
@@ -196,14 +168,6 @@ async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
         shield = await async_client.shields.list()
         assert_matches_type(ShieldListResponse, shield, path=["response"])
 
-    @parametrize
-    async def test_method_list_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        shield = await async_client.shields.list(
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(ShieldListResponse, shield, path=["response"])
-
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.shields.with_raw_response.list()
@@ -238,8 +202,6 @@ async def test_method_register_with_all_params(self, async_client: AsyncLlamaSta
             params={"foo": True},
             provider_id="provider_id",
             provider_shield_id="provider_shield_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(Shield, shield, path=["response"])
 
diff --git a/tests/api_resources/test_synthetic_data_generation.py b/tests/api_resources/test_synthetic_data_generation.py
index d8fd0eed..db409b53 100644
--- a/tests/api_resources/test_synthetic_data_generation.py
+++ b/tests/api_resources/test_synthetic_data_generation.py
@@ -42,8 +42,6 @@ def test_method_generate_with_all_params(self, client: LlamaStackClient) -> None
             ],
             filtering_function="none",
             model="model",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(SyntheticDataGenerationResponse, synthetic_data_generation, path=["response"])
 
@@ -112,8 +110,6 @@ async def test_method_generate_with_all_params(self, async_client: AsyncLlamaSta
             ],
             filtering_function="none",
             model="model",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(SyntheticDataGenerationResponse, synthetic_data_generation, path=["response"])
 
diff --git a/tests/api_resources/test_telemetry.py b/tests/api_resources/test_telemetry.py
index 3f67d426..99886c2d 100644
--- a/tests/api_resources/test_telemetry.py
+++ b/tests/api_resources/test_telemetry.py
@@ -32,16 +32,6 @@ def test_method_get_span(self, client: LlamaStackClient) -> None:
         )
         assert_matches_type(TelemetryGetSpanResponse, telemetry, path=["response"])
 
-    @parametrize
-    def test_method_get_span_with_all_params(self, client: LlamaStackClient) -> None:
-        telemetry = client.telemetry.get_span(
-            span_id="span_id",
-            trace_id="trace_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(TelemetryGetSpanResponse, telemetry, path=["response"])
-
     @parametrize
     def test_raw_response_get_span(self, client: LlamaStackClient) -> None:
         response = client.telemetry.with_raw_response.get_span(
@@ -95,8 +85,6 @@ def test_method_get_span_tree_with_all_params(self, client: LlamaStackClient) ->
             span_id="span_id",
             attributes_to_return=["string"],
             max_depth=0,
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(TelemetryGetSpanTreeResponse, telemetry, path=["response"])
 
@@ -134,23 +122,14 @@ def test_path_params_get_span_tree(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_method_get_trace(self, client: LlamaStackClient) -> None:
         telemetry = client.telemetry.get_trace(
-            trace_id="trace_id",
-        )
-        assert_matches_type(Trace, telemetry, path=["response"])
-
-    @parametrize
-    def test_method_get_trace_with_all_params(self, client: LlamaStackClient) -> None:
-        telemetry = client.telemetry.get_trace(
-            trace_id="trace_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
+            "trace_id",
         )
         assert_matches_type(Trace, telemetry, path=["response"])
 
     @parametrize
     def test_raw_response_get_trace(self, client: LlamaStackClient) -> None:
         response = client.telemetry.with_raw_response.get_trace(
-            trace_id="trace_id",
+            "trace_id",
         )
 
         assert response.is_closed is True
@@ -161,7 +140,7 @@ def test_raw_response_get_trace(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_streaming_response_get_trace(self, client: LlamaStackClient) -> None:
         with client.telemetry.with_streaming_response.get_trace(
-            trace_id="trace_id",
+            "trace_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -175,7 +154,7 @@ def test_streaming_response_get_trace(self, client: LlamaStackClient) -> None:
     def test_path_params_get_trace(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `trace_id` but received ''"):
             client.telemetry.with_raw_response.get_trace(
-                trace_id="",
+                "",
             )
 
     @parametrize
@@ -206,8 +185,6 @@ def test_method_log_event_with_all_params(self, client: LlamaStackClient) -> Non
                 "attributes": {"foo": True},
             },
             ttl_seconds=0,
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert telemetry is None
 
@@ -279,8 +256,6 @@ def test_method_query_spans_with_all_params(self, client: LlamaStackClient) -> N
             ],
             attributes_to_return=["string"],
             max_depth=0,
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(TelemetryQuerySpansResponse, telemetry, path=["response"])
 
@@ -344,8 +319,6 @@ def test_method_query_traces_with_all_params(self, client: LlamaStackClient) ->
             limit=0,
             offset=0,
             order_by=["string"],
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(TelemetryQueryTracesResponse, telemetry, path=["response"])
 
@@ -399,8 +372,6 @@ def test_method_save_spans_to_dataset_with_all_params(self, client: LlamaStackCl
             attributes_to_save=["string"],
             dataset_id="dataset_id",
             max_depth=0,
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert telemetry is None
 
@@ -456,16 +427,6 @@ async def test_method_get_span(self, async_client: AsyncLlamaStackClient) -> Non
         )
         assert_matches_type(TelemetryGetSpanResponse, telemetry, path=["response"])
 
-    @parametrize
-    async def test_method_get_span_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        telemetry = await async_client.telemetry.get_span(
-            span_id="span_id",
-            trace_id="trace_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(TelemetryGetSpanResponse, telemetry, path=["response"])
-
     @parametrize
     async def test_raw_response_get_span(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.telemetry.with_raw_response.get_span(
@@ -519,8 +480,6 @@ async def test_method_get_span_tree_with_all_params(self, async_client: AsyncLla
             span_id="span_id",
             attributes_to_return=["string"],
             max_depth=0,
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(TelemetryGetSpanTreeResponse, telemetry, path=["response"])
 
@@ -558,23 +517,14 @@ async def test_path_params_get_span_tree(self, async_client: AsyncLlamaStackClie
     @parametrize
     async def test_method_get_trace(self, async_client: AsyncLlamaStackClient) -> None:
         telemetry = await async_client.telemetry.get_trace(
-            trace_id="trace_id",
-        )
-        assert_matches_type(Trace, telemetry, path=["response"])
-
-    @parametrize
-    async def test_method_get_trace_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        telemetry = await async_client.telemetry.get_trace(
-            trace_id="trace_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
+            "trace_id",
         )
         assert_matches_type(Trace, telemetry, path=["response"])
 
     @parametrize
     async def test_raw_response_get_trace(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.telemetry.with_raw_response.get_trace(
-            trace_id="trace_id",
+            "trace_id",
         )
 
         assert response.is_closed is True
@@ -585,7 +535,7 @@ async def test_raw_response_get_trace(self, async_client: AsyncLlamaStackClient)
     @parametrize
     async def test_streaming_response_get_trace(self, async_client: AsyncLlamaStackClient) -> None:
         async with async_client.telemetry.with_streaming_response.get_trace(
-            trace_id="trace_id",
+            "trace_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -599,7 +549,7 @@ async def test_streaming_response_get_trace(self, async_client: AsyncLlamaStackC
     async def test_path_params_get_trace(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `trace_id` but received ''"):
             await async_client.telemetry.with_raw_response.get_trace(
-                trace_id="",
+                "",
             )
 
     @parametrize
@@ -630,8 +580,6 @@ async def test_method_log_event_with_all_params(self, async_client: AsyncLlamaSt
                 "attributes": {"foo": True},
             },
             ttl_seconds=0,
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert telemetry is None
 
@@ -703,8 +651,6 @@ async def test_method_query_spans_with_all_params(self, async_client: AsyncLlama
             ],
             attributes_to_return=["string"],
             max_depth=0,
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(TelemetryQuerySpansResponse, telemetry, path=["response"])
 
@@ -768,8 +714,6 @@ async def test_method_query_traces_with_all_params(self, async_client: AsyncLlam
             limit=0,
             offset=0,
             order_by=["string"],
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(TelemetryQueryTracesResponse, telemetry, path=["response"])
 
@@ -823,8 +767,6 @@ async def test_method_save_spans_to_dataset_with_all_params(self, async_client:
             attributes_to_save=["string"],
             dataset_id="dataset_id",
             max_depth=0,
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert telemetry is None
 
diff --git a/tests/api_resources/test_tool_runtime.py b/tests/api_resources/test_tool_runtime.py
index a415cac1..ca4279bb 100644
--- a/tests/api_resources/test_tool_runtime.py
+++ b/tests/api_resources/test_tool_runtime.py
@@ -29,16 +29,6 @@ def test_method_invoke_tool(self, client: LlamaStackClient) -> None:
         )
         assert_matches_type(ToolInvocationResult, tool_runtime, path=["response"])
 
-    @parametrize
-    def test_method_invoke_tool_with_all_params(self, client: LlamaStackClient) -> None:
-        tool_runtime = client.tool_runtime.invoke_tool(
-            kwargs={"foo": True},
-            tool_name="tool_name",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(ToolInvocationResult, tool_runtime, path=["response"])
-
     @parametrize
     def test_raw_response_invoke_tool(self, client: LlamaStackClient) -> None:
         response = client.tool_runtime.with_raw_response.invoke_tool(
@@ -77,8 +67,6 @@ def test_method_list_tools_with_all_params(self, client: LlamaStackClient) -> No
         tool_runtime = client.tool_runtime.list_tools(
             mcp_endpoint={"uri": "uri"},
             tool_group_id="tool_group_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(JSONLDecoder[ToolDef], tool_runtime, path=["response"])
 
@@ -116,16 +104,6 @@ async def test_method_invoke_tool(self, async_client: AsyncLlamaStackClient) ->
         )
         assert_matches_type(ToolInvocationResult, tool_runtime, path=["response"])
 
-    @parametrize
-    async def test_method_invoke_tool_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        tool_runtime = await async_client.tool_runtime.invoke_tool(
-            kwargs={"foo": True},
-            tool_name="tool_name",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(ToolInvocationResult, tool_runtime, path=["response"])
-
     @parametrize
     async def test_raw_response_invoke_tool(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.tool_runtime.with_raw_response.invoke_tool(
@@ -164,8 +142,6 @@ async def test_method_list_tools_with_all_params(self, async_client: AsyncLlamaS
         tool_runtime = await async_client.tool_runtime.list_tools(
             mcp_endpoint={"uri": "uri"},
             tool_group_id="tool_group_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(AsyncJSONLDecoder[ToolDef], tool_runtime, path=["response"])
 
diff --git a/tests/api_resources/test_toolgroups.py b/tests/api_resources/test_toolgroups.py
index 2ed0d635..42a38226 100644
--- a/tests/api_resources/test_toolgroups.py
+++ b/tests/api_resources/test_toolgroups.py
@@ -22,14 +22,6 @@ def test_method_list(self, client: LlamaStackClient) -> None:
         toolgroup = client.toolgroups.list()
         assert_matches_type(ToolgroupListResponse, toolgroup, path=["response"])
 
-    @parametrize
-    def test_method_list_with_all_params(self, client: LlamaStackClient) -> None:
-        toolgroup = client.toolgroups.list(
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(ToolgroupListResponse, toolgroup, path=["response"])
-
     @parametrize
     def test_raw_response_list(self, client: LlamaStackClient) -> None:
         response = client.toolgroups.with_raw_response.list()
@@ -53,23 +45,14 @@ def test_streaming_response_list(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_method_get(self, client: LlamaStackClient) -> None:
         toolgroup = client.toolgroups.get(
-            toolgroup_id="toolgroup_id",
-        )
-        assert_matches_type(ToolGroup, toolgroup, path=["response"])
-
-    @parametrize
-    def test_method_get_with_all_params(self, client: LlamaStackClient) -> None:
-        toolgroup = client.toolgroups.get(
-            toolgroup_id="toolgroup_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
+            "toolgroup_id",
         )
         assert_matches_type(ToolGroup, toolgroup, path=["response"])
 
     @parametrize
     def test_raw_response_get(self, client: LlamaStackClient) -> None:
         response = client.toolgroups.with_raw_response.get(
-            toolgroup_id="toolgroup_id",
+            "toolgroup_id",
         )
 
         assert response.is_closed is True
@@ -80,7 +63,7 @@ def test_raw_response_get(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_streaming_response_get(self, client: LlamaStackClient) -> None:
         with client.toolgroups.with_streaming_response.get(
-            toolgroup_id="toolgroup_id",
+            "toolgroup_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -94,7 +77,7 @@ def test_streaming_response_get(self, client: LlamaStackClient) -> None:
     def test_path_params_get(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `toolgroup_id` but received ''"):
             client.toolgroups.with_raw_response.get(
-                toolgroup_id="",
+                "",
             )
 
     @parametrize
@@ -112,8 +95,6 @@ def test_method_register_with_all_params(self, client: LlamaStackClient) -> None
             toolgroup_id="toolgroup_id",
             args={"foo": True},
             mcp_endpoint={"uri": "uri"},
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert toolgroup is None
 
@@ -146,23 +127,14 @@ def test_streaming_response_register(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_method_unregister(self, client: LlamaStackClient) -> None:
         toolgroup = client.toolgroups.unregister(
-            toolgroup_id="toolgroup_id",
-        )
-        assert toolgroup is None
-
-    @parametrize
-    def test_method_unregister_with_all_params(self, client: LlamaStackClient) -> None:
-        toolgroup = client.toolgroups.unregister(
-            toolgroup_id="toolgroup_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
+            "toolgroup_id",
         )
         assert toolgroup is None
 
     @parametrize
     def test_raw_response_unregister(self, client: LlamaStackClient) -> None:
         response = client.toolgroups.with_raw_response.unregister(
-            toolgroup_id="toolgroup_id",
+            "toolgroup_id",
         )
 
         assert response.is_closed is True
@@ -173,7 +145,7 @@ def test_raw_response_unregister(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_streaming_response_unregister(self, client: LlamaStackClient) -> None:
         with client.toolgroups.with_streaming_response.unregister(
-            toolgroup_id="toolgroup_id",
+            "toolgroup_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -187,7 +159,7 @@ def test_streaming_response_unregister(self, client: LlamaStackClient) -> None:
     def test_path_params_unregister(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `toolgroup_id` but received ''"):
             client.toolgroups.with_raw_response.unregister(
-                toolgroup_id="",
+                "",
             )
 
 
@@ -199,14 +171,6 @@ async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
         toolgroup = await async_client.toolgroups.list()
         assert_matches_type(ToolgroupListResponse, toolgroup, path=["response"])
 
-    @parametrize
-    async def test_method_list_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        toolgroup = await async_client.toolgroups.list(
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(ToolgroupListResponse, toolgroup, path=["response"])
-
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.toolgroups.with_raw_response.list()
@@ -230,23 +194,14 @@ async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient
     @parametrize
     async def test_method_get(self, async_client: AsyncLlamaStackClient) -> None:
         toolgroup = await async_client.toolgroups.get(
-            toolgroup_id="toolgroup_id",
-        )
-        assert_matches_type(ToolGroup, toolgroup, path=["response"])
-
-    @parametrize
-    async def test_method_get_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        toolgroup = await async_client.toolgroups.get(
-            toolgroup_id="toolgroup_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
+            "toolgroup_id",
         )
         assert_matches_type(ToolGroup, toolgroup, path=["response"])
 
     @parametrize
     async def test_raw_response_get(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.toolgroups.with_raw_response.get(
-            toolgroup_id="toolgroup_id",
+            "toolgroup_id",
         )
 
         assert response.is_closed is True
@@ -257,7 +212,7 @@ async def test_raw_response_get(self, async_client: AsyncLlamaStackClient) -> No
     @parametrize
     async def test_streaming_response_get(self, async_client: AsyncLlamaStackClient) -> None:
         async with async_client.toolgroups.with_streaming_response.get(
-            toolgroup_id="toolgroup_id",
+            "toolgroup_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -271,7 +226,7 @@ async def test_streaming_response_get(self, async_client: AsyncLlamaStackClient)
     async def test_path_params_get(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `toolgroup_id` but received ''"):
             await async_client.toolgroups.with_raw_response.get(
-                toolgroup_id="",
+                "",
             )
 
     @parametrize
@@ -289,8 +244,6 @@ async def test_method_register_with_all_params(self, async_client: AsyncLlamaSta
             toolgroup_id="toolgroup_id",
             args={"foo": True},
             mcp_endpoint={"uri": "uri"},
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert toolgroup is None
 
@@ -323,23 +276,14 @@ async def test_streaming_response_register(self, async_client: AsyncLlamaStackCl
     @parametrize
     async def test_method_unregister(self, async_client: AsyncLlamaStackClient) -> None:
         toolgroup = await async_client.toolgroups.unregister(
-            toolgroup_id="toolgroup_id",
-        )
-        assert toolgroup is None
-
-    @parametrize
-    async def test_method_unregister_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        toolgroup = await async_client.toolgroups.unregister(
-            toolgroup_id="toolgroup_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
+            "toolgroup_id",
         )
         assert toolgroup is None
 
     @parametrize
     async def test_raw_response_unregister(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.toolgroups.with_raw_response.unregister(
-            toolgroup_id="toolgroup_id",
+            "toolgroup_id",
         )
 
         assert response.is_closed is True
@@ -350,7 +294,7 @@ async def test_raw_response_unregister(self, async_client: AsyncLlamaStackClient
     @parametrize
     async def test_streaming_response_unregister(self, async_client: AsyncLlamaStackClient) -> None:
         async with async_client.toolgroups.with_streaming_response.unregister(
-            toolgroup_id="toolgroup_id",
+            "toolgroup_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -364,5 +308,5 @@ async def test_streaming_response_unregister(self, async_client: AsyncLlamaStack
     async def test_path_params_unregister(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `toolgroup_id` but received ''"):
             await async_client.toolgroups.with_raw_response.unregister(
-                toolgroup_id="",
+                "",
             )
diff --git a/tests/api_resources/test_tools.py b/tests/api_resources/test_tools.py
index 4cfab520..2dd1ace1 100644
--- a/tests/api_resources/test_tools.py
+++ b/tests/api_resources/test_tools.py
@@ -26,8 +26,6 @@ def test_method_list(self, client: LlamaStackClient) -> None:
     def test_method_list_with_all_params(self, client: LlamaStackClient) -> None:
         tool = client.tools.list(
             toolgroup_id="toolgroup_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(ToolListResponse, tool, path=["response"])
 
@@ -54,23 +52,14 @@ def test_streaming_response_list(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_method_get(self, client: LlamaStackClient) -> None:
         tool = client.tools.get(
-            tool_name="tool_name",
-        )
-        assert_matches_type(Tool, tool, path=["response"])
-
-    @parametrize
-    def test_method_get_with_all_params(self, client: LlamaStackClient) -> None:
-        tool = client.tools.get(
-            tool_name="tool_name",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
+            "tool_name",
         )
         assert_matches_type(Tool, tool, path=["response"])
 
     @parametrize
     def test_raw_response_get(self, client: LlamaStackClient) -> None:
         response = client.tools.with_raw_response.get(
-            tool_name="tool_name",
+            "tool_name",
         )
 
         assert response.is_closed is True
@@ -81,7 +70,7 @@ def test_raw_response_get(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_streaming_response_get(self, client: LlamaStackClient) -> None:
         with client.tools.with_streaming_response.get(
-            tool_name="tool_name",
+            "tool_name",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -95,7 +84,7 @@ def test_streaming_response_get(self, client: LlamaStackClient) -> None:
     def test_path_params_get(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `tool_name` but received ''"):
             client.tools.with_raw_response.get(
-                tool_name="",
+                "",
             )
 
 
@@ -111,8 +100,6 @@ async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
     async def test_method_list_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
         tool = await async_client.tools.list(
             toolgroup_id="toolgroup_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(ToolListResponse, tool, path=["response"])
 
@@ -139,23 +126,14 @@ async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient
     @parametrize
     async def test_method_get(self, async_client: AsyncLlamaStackClient) -> None:
         tool = await async_client.tools.get(
-            tool_name="tool_name",
-        )
-        assert_matches_type(Tool, tool, path=["response"])
-
-    @parametrize
-    async def test_method_get_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        tool = await async_client.tools.get(
-            tool_name="tool_name",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
+            "tool_name",
         )
         assert_matches_type(Tool, tool, path=["response"])
 
     @parametrize
     async def test_raw_response_get(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.tools.with_raw_response.get(
-            tool_name="tool_name",
+            "tool_name",
         )
 
         assert response.is_closed is True
@@ -166,7 +144,7 @@ async def test_raw_response_get(self, async_client: AsyncLlamaStackClient) -> No
     @parametrize
     async def test_streaming_response_get(self, async_client: AsyncLlamaStackClient) -> None:
         async with async_client.tools.with_streaming_response.get(
-            tool_name="tool_name",
+            "tool_name",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -180,5 +158,5 @@ async def test_streaming_response_get(self, async_client: AsyncLlamaStackClient)
     async def test_path_params_get(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `tool_name` but received ''"):
             await async_client.tools.with_raw_response.get(
-                tool_name="",
+                "",
             )
diff --git a/tests/api_resources/test_vector_dbs.py b/tests/api_resources/test_vector_dbs.py
index 68891d0f..63c5a3f0 100644
--- a/tests/api_resources/test_vector_dbs.py
+++ b/tests/api_resources/test_vector_dbs.py
@@ -24,23 +24,14 @@ class TestVectorDBs:
     @parametrize
     def test_method_retrieve(self, client: LlamaStackClient) -> None:
         vector_db = client.vector_dbs.retrieve(
-            vector_db_id="vector_db_id",
-        )
-        assert_matches_type(Optional[VectorDBRetrieveResponse], vector_db, path=["response"])
-
-    @parametrize
-    def test_method_retrieve_with_all_params(self, client: LlamaStackClient) -> None:
-        vector_db = client.vector_dbs.retrieve(
-            vector_db_id="vector_db_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
+            "vector_db_id",
         )
         assert_matches_type(Optional[VectorDBRetrieveResponse], vector_db, path=["response"])
 
     @parametrize
     def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
         response = client.vector_dbs.with_raw_response.retrieve(
-            vector_db_id="vector_db_id",
+            "vector_db_id",
         )
 
         assert response.is_closed is True
@@ -51,7 +42,7 @@ def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
         with client.vector_dbs.with_streaming_response.retrieve(
-            vector_db_id="vector_db_id",
+            "vector_db_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -65,7 +56,7 @@ def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
     def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_db_id` but received ''"):
             client.vector_dbs.with_raw_response.retrieve(
-                vector_db_id="",
+                "",
             )
 
     @parametrize
@@ -73,14 +64,6 @@ def test_method_list(self, client: LlamaStackClient) -> None:
         vector_db = client.vector_dbs.list()
         assert_matches_type(VectorDBListResponse, vector_db, path=["response"])
 
-    @parametrize
-    def test_method_list_with_all_params(self, client: LlamaStackClient) -> None:
-        vector_db = client.vector_dbs.list(
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(VectorDBListResponse, vector_db, path=["response"])
-
     @parametrize
     def test_raw_response_list(self, client: LlamaStackClient) -> None:
         response = client.vector_dbs.with_raw_response.list()
@@ -117,8 +100,6 @@ def test_method_register_with_all_params(self, client: LlamaStackClient) -> None
             embedding_dimension=0,
             provider_id="provider_id",
             provider_vector_db_id="provider_vector_db_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(VectorDBRegisterResponse, vector_db, path=["response"])
 
@@ -151,23 +132,14 @@ def test_streaming_response_register(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_method_unregister(self, client: LlamaStackClient) -> None:
         vector_db = client.vector_dbs.unregister(
-            vector_db_id="vector_db_id",
-        )
-        assert vector_db is None
-
-    @parametrize
-    def test_method_unregister_with_all_params(self, client: LlamaStackClient) -> None:
-        vector_db = client.vector_dbs.unregister(
-            vector_db_id="vector_db_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
+            "vector_db_id",
         )
         assert vector_db is None
 
     @parametrize
     def test_raw_response_unregister(self, client: LlamaStackClient) -> None:
         response = client.vector_dbs.with_raw_response.unregister(
-            vector_db_id="vector_db_id",
+            "vector_db_id",
         )
 
         assert response.is_closed is True
@@ -178,7 +150,7 @@ def test_raw_response_unregister(self, client: LlamaStackClient) -> None:
     @parametrize
     def test_streaming_response_unregister(self, client: LlamaStackClient) -> None:
         with client.vector_dbs.with_streaming_response.unregister(
-            vector_db_id="vector_db_id",
+            "vector_db_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -192,7 +164,7 @@ def test_streaming_response_unregister(self, client: LlamaStackClient) -> None:
     def test_path_params_unregister(self, client: LlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_db_id` but received ''"):
             client.vector_dbs.with_raw_response.unregister(
-                vector_db_id="",
+                "",
             )
 
 
@@ -202,23 +174,14 @@ class TestAsyncVectorDBs:
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
         vector_db = await async_client.vector_dbs.retrieve(
-            vector_db_id="vector_db_id",
-        )
-        assert_matches_type(Optional[VectorDBRetrieveResponse], vector_db, path=["response"])
-
-    @parametrize
-    async def test_method_retrieve_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        vector_db = await async_client.vector_dbs.retrieve(
-            vector_db_id="vector_db_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
+            "vector_db_id",
         )
         assert_matches_type(Optional[VectorDBRetrieveResponse], vector_db, path=["response"])
 
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.vector_dbs.with_raw_response.retrieve(
-            vector_db_id="vector_db_id",
+            "vector_db_id",
         )
 
         assert response.is_closed is True
@@ -229,7 +192,7 @@ async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient)
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
         async with async_client.vector_dbs.with_streaming_response.retrieve(
-            vector_db_id="vector_db_id",
+            "vector_db_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -243,7 +206,7 @@ async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackCl
     async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_db_id` but received ''"):
             await async_client.vector_dbs.with_raw_response.retrieve(
-                vector_db_id="",
+                "",
             )
 
     @parametrize
@@ -251,14 +214,6 @@ async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
         vector_db = await async_client.vector_dbs.list()
         assert_matches_type(VectorDBListResponse, vector_db, path=["response"])
 
-    @parametrize
-    async def test_method_list_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        vector_db = await async_client.vector_dbs.list(
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert_matches_type(VectorDBListResponse, vector_db, path=["response"])
-
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.vector_dbs.with_raw_response.list()
@@ -295,8 +250,6 @@ async def test_method_register_with_all_params(self, async_client: AsyncLlamaSta
             embedding_dimension=0,
             provider_id="provider_id",
             provider_vector_db_id="provider_vector_db_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(VectorDBRegisterResponse, vector_db, path=["response"])
 
@@ -329,23 +282,14 @@ async def test_streaming_response_register(self, async_client: AsyncLlamaStackCl
     @parametrize
     async def test_method_unregister(self, async_client: AsyncLlamaStackClient) -> None:
         vector_db = await async_client.vector_dbs.unregister(
-            vector_db_id="vector_db_id",
-        )
-        assert vector_db is None
-
-    @parametrize
-    async def test_method_unregister_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        vector_db = await async_client.vector_dbs.unregister(
-            vector_db_id="vector_db_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
+            "vector_db_id",
         )
         assert vector_db is None
 
     @parametrize
     async def test_raw_response_unregister(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.vector_dbs.with_raw_response.unregister(
-            vector_db_id="vector_db_id",
+            "vector_db_id",
         )
 
         assert response.is_closed is True
@@ -356,7 +300,7 @@ async def test_raw_response_unregister(self, async_client: AsyncLlamaStackClient
     @parametrize
     async def test_streaming_response_unregister(self, async_client: AsyncLlamaStackClient) -> None:
         async with async_client.vector_dbs.with_streaming_response.unregister(
-            vector_db_id="vector_db_id",
+            "vector_db_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -370,5 +314,5 @@ async def test_streaming_response_unregister(self, async_client: AsyncLlamaStack
     async def test_path_params_unregister(self, async_client: AsyncLlamaStackClient) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_db_id` but received ''"):
             await async_client.vector_dbs.with_raw_response.unregister(
-                vector_db_id="",
+                "",
             )
diff --git a/tests/api_resources/test_vector_io.py b/tests/api_resources/test_vector_io.py
index 25562be6..6e1cd5e0 100644
--- a/tests/api_resources/test_vector_io.py
+++ b/tests/api_resources/test_vector_io.py
@@ -41,8 +41,6 @@ def test_method_insert_with_all_params(self, client: LlamaStackClient) -> None:
             ],
             vector_db_id="vector_db_id",
             ttl_seconds=0,
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert vector_io is None
 
@@ -96,8 +94,6 @@ def test_method_query_with_all_params(self, client: LlamaStackClient) -> None:
             query="string",
             vector_db_id="vector_db_id",
             params={"foo": True},
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(QueryChunksResponse, vector_io, path=["response"])
 
@@ -155,8 +151,6 @@ async def test_method_insert_with_all_params(self, async_client: AsyncLlamaStack
             ],
             vector_db_id="vector_db_id",
             ttl_seconds=0,
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert vector_io is None
 
@@ -210,8 +204,6 @@ async def test_method_query_with_all_params(self, async_client: AsyncLlamaStackC
             query="string",
             vector_db_id="vector_db_id",
             params={"foo": True},
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(QueryChunksResponse, vector_io, path=["response"])
 
diff --git a/tests/api_resources/tool_runtime/test_rag_tool.py b/tests/api_resources/tool_runtime/test_rag_tool.py
index 4082752a..e687df20 100644
--- a/tests/api_resources/tool_runtime/test_rag_tool.py
+++ b/tests/api_resources/tool_runtime/test_rag_tool.py
@@ -32,24 +32,6 @@ def test_method_insert(self, client: LlamaStackClient) -> None:
         )
         assert rag_tool is None
 
-    @parametrize
-    def test_method_insert_with_all_params(self, client: LlamaStackClient) -> None:
-        rag_tool = client.tool_runtime.rag_tool.insert(
-            chunk_size_in_tokens=0,
-            documents=[
-                {
-                    "content": "string",
-                    "document_id": "document_id",
-                    "metadata": {"foo": True},
-                    "mime_type": "mime_type",
-                }
-            ],
-            vector_db_id="vector_db_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert rag_tool is None
-
     @parametrize
     def test_raw_response_insert(self, client: LlamaStackClient) -> None:
         response = client.tool_runtime.rag_tool.with_raw_response.insert(
@@ -111,8 +93,6 @@ def test_method_query_with_all_params(self, client: LlamaStackClient) -> None:
                     "type": "default",
                 },
             },
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(QueryResult, rag_tool, path=["response"])
 
@@ -161,24 +141,6 @@ async def test_method_insert(self, async_client: AsyncLlamaStackClient) -> None:
         )
         assert rag_tool is None
 
-    @parametrize
-    async def test_method_insert_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        rag_tool = await async_client.tool_runtime.rag_tool.insert(
-            chunk_size_in_tokens=0,
-            documents=[
-                {
-                    "content": "string",
-                    "document_id": "document_id",
-                    "metadata": {"foo": True},
-                    "mime_type": "mime_type",
-                }
-            ],
-            vector_db_id="vector_db_id",
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
-        )
-        assert rag_tool is None
-
     @parametrize
     async def test_raw_response_insert(self, async_client: AsyncLlamaStackClient) -> None:
         response = await async_client.tool_runtime.rag_tool.with_raw_response.insert(
@@ -240,8 +202,6 @@ async def test_method_query_with_all_params(self, async_client: AsyncLlamaStackC
                     "type": "default",
                 },
             },
-            x_llama_stack_client_version="X-LlamaStack-Client-Version",
-            x_llama_stack_provider_data="X-LlamaStack-Provider-Data",
         )
         assert_matches_type(QueryResult, rag_tool, path=["response"])