From e515481f103480285df700fb5dcf204810e05508 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Fri, 18 Jul 2025 20:07:18 +0000
Subject: [PATCH 1/4] chore: sync repo

---
 .devcontainer/Dockerfile                      |    4 +-
 .devcontainer/devcontainer.json               |    5 +-
 .github/CODEOWNERS                            |    5 -
 .github/ISSUE_TEMPLATE/bug.yml                |   77 -
 .github/PULL_REQUEST_TEMPLATE.md              |   11 -
 .github/workflows/ci.yml                      |   96 ++
 .github/workflows/pre-commit.yml              |   25 -
 .github/workflows/publish-to-test-pypi.yml    |   76 -
 .gitignore                                    |    1 -
 .pre-commit-config.yaml                       |   74 -
 .python-version                               |    2 +-
 .ruff.toml                                    |   37 -
 .stats.yml                                    |    6 +-
 Brewfile                                      |    1 +
 CODE_OF_CONDUCT.md                            |   80 -
 CONTRIBUTING.md                               |   86 +-
 LICENSE                                       |  223 ++-
 README.md                                     |  267 ++--
 SECURITY.md                                   |   10 +-
 api.md                                        |  342 +---
 bin/publish-pypi                              |    3 -
 docs/cli_reference.md                         |  918 -----------
 examples/.keep                                |    2 +-
 examples/mcp_agent.py                         |  136 --
 .../supervised_fine_tune_client.py            |  108 --
 mypy.ini                                      |    7 +-
 pyproject.toml                                |  137 +-
 requirements-dev.lock                         |  135 ++
 requirements.lock                             |   72 +
 scripts/bootstrap                             |    2 +-
 scripts/gen_cli_doc.py                        |   82 -
 scripts/lint                                  |   12 +-
 scripts/test                                  |  116 +-
 scripts/utils/ruffen-docs.py                  |    4 +-
 scripts/utils/upload-artifact.sh              |   27 +
 .../__init__.py                               |   22 +-
 .../_base_client.py                           |   13 +-
 src/llama_stack_cli/_client.py                |  420 +++++
 .../_compat.py                                |    0
 .../_constants.py                             |    0
 .../_exceptions.py                            |    4 +-
 .../_files.py                                 |    2 +-
 .../_models.py                                |   13 +-
 .../_qs.py                                    |    0
 .../_resource.py                              |   10 +-
 .../_response.py                              |   12 +-
 .../_streaming.py                             |    6 +-
 .../_types.py                                 |    2 +-
 .../_utils/__init__.py                        |    0
 .../_utils/_logs.py                           |    8 +-
 .../_utils/_proxy.py                          |    0
 .../_utils/_reflection.py                     |    0
 .../_utils/_resources_proxy.py                |   24 +
 .../_utils/_streams.py                        |    0
 .../_utils/_sync.py                           |    0
 .../_utils/_transform.py                      |    0
 .../_utils/_typing.py                         |    0
 .../_utils/_utils.py                          |    0
 .../_version.py                               |    4 +-
 .../lib/.keep                                 |    2 +-
 .../py.typed                                  |    0
 src/llama_stack_cli/resources/__init__.py     |   47 +
 .../resources/pet.py}                         |  771 ++++-----
 .../resources/store/__init__.py               |   33 +
 .../resources/store/order.py}                 |  281 ++--
 src/llama_stack_cli/resources/store/store.py  |  167 ++
 src/llama_stack_cli/resources/user.py         |  724 +++++++++
 src/llama_stack_cli/types/__init__.py         |   25 +
 src/llama_stack_cli/types/category.py         |   13 +
 .../types/category_param.py}                  |    9 +-
 src/llama_stack_cli/types/pet.py              |   32 +
 .../types/pet_create_params.py                |   32 +
 .../types/pet_find_by_status_params.py        |   12 +
 .../types/pet_find_by_status_response.py}     |    6 +-
 .../types/pet_find_by_tags_params.py          |   13 +
 .../types/pet_find_by_tags_response.py}       |    6 +-
 .../types/pet_update_by_id_params.py          |   15 +
 .../types/pet_update_params.py                |   32 +
 .../types/pet_upload_image_params.py          |   17 +
 .../types/pet_upload_image_response.py        |   15 +
 .../types/shared}/__init__.py                 |    2 +-
 src/llama_stack_cli/types/shared/order.py     |   26 +
 src/llama_stack_cli/types/store/__init__.py   |    5 +
 .../types/store/order_create_params.py        |   26 +
 .../types/store_list_inventory_response.py}   |    8 +-
 src/llama_stack_cli/types/user.py             |   28 +
 .../types/user_create_params.py               |   28 +
 .../types/user_create_with_list_params.py     |   14 +
 .../types/user_login_params.py                |   15 +
 .../types/user_login_response.py              |    7 +
 src/llama_stack_cli/types/user_param.py       |   28 +
 .../types/user_update_params.py               |   28 +
 src/llama_stack_client/_client.py             |  664 --------
 .../_utils/_resources_proxy.py                |   24 -
 src/llama_stack_client/_wrappers.py           |   17 -
 src/llama_stack_client/lib/__init__.py        |    9 -
 src/llama_stack_client/lib/agents/__init__.py |    5 -
 src/llama_stack_client/lib/agents/agent.py    |  601 -------
 .../lib/agents/client_tool.py                 |  235 ---
 .../lib/agents/event_logger.py                |  165 --
 .../lib/agents/react/__init__.py              |    5 -
 .../lib/agents/react/agent.py                 |  223 ---
 .../lib/agents/react/prompts.py               |  151 --
 .../lib/agents/react/tool_parser.py           |   63 -
 .../lib/agents/tool_parser.py                 |   44 -
 src/llama_stack_client/lib/cli/__init__.py    |   12 -
 .../lib/cli/common/__init__.py                |    5 -
 .../lib/cli/common/utils.py                   |   54 -
 src/llama_stack_client/lib/cli/configure.py   |   68 -
 src/llama_stack_client/lib/cli/constants.py   |   14 -
 .../lib/cli/datasets/__init__.py              |    9 -
 .../lib/cli/datasets/datasets.py              |   22 -
 .../lib/cli/datasets/list.py                  |   32 -
 .../lib/cli/datasets/register.py              |   78 -
 .../lib/cli/datasets/unregister.py            |   20 -
 .../lib/cli/eval/__init__.py                  |    9 -
 src/llama_stack_client/lib/cli/eval/eval.py   |   22 -
 .../lib/cli/eval/run_benchmark.py             |  194 ---
 .../lib/cli/eval/run_scoring.py               |  120 --
 src/llama_stack_client/lib/cli/eval/utils.py  |   58 -
 .../lib/cli/eval_tasks/__init__.py            |    9 -
 .../lib/cli/eval_tasks/eval_tasks.py          |   66 -
 .../lib/cli/eval_tasks/list.py                |   35 -
 .../lib/cli/inference/__init__.py             |    9 -
 .../lib/cli/inference/inference.py            |   89 --
 .../lib/cli/inspect/__init__.py               |    3 -
 .../lib/cli/inspect/inspect.py                |   13 -
 .../lib/cli/inspect/version.py                |   16 -
 .../lib/cli/llama_stack_client.py             |  103 --
 .../lib/cli/models/__init__.py                |    9 -
 .../lib/cli/models/models.py                  |  143 --
 .../lib/cli/post_training/__init__.py         |    9 -
 .../lib/cli/post_training/post_training.py    |  115 --
 .../lib/cli/providers/__init__.py             |    3 -
 .../lib/cli/providers/inspect.py              |   27 -
 .../lib/cli/providers/list.py                 |   26 -
 .../lib/cli/providers/providers.py            |   15 -
 .../lib/cli/scoring_functions/__init__.py     |    9 -
 .../lib/cli/scoring_functions/list.py         |   38 -
 .../scoring_functions/scoring_functions.py    |   63 -
 .../lib/cli/shields/__init__.py               |    9 -
 .../lib/cli/shields/shields.py                |   98 --
 .../lib/cli/toolgroups/__init__.py            |    9 -
 .../lib/cli/toolgroups/toolgroups.py          |  130 --
 .../lib/cli/vector_dbs/__init__.py            |    9 -
 .../lib/cli/vector_dbs/vector_dbs.py          |  116 --
 .../lib/inference/__init__.py                 |    5 -
 .../lib/inference/event_logger.py             |   80 -
 src/llama_stack_client/lib/inference/utils.py |   21 -
 src/llama_stack_client/lib/inline/inline.py   |    0
 src/llama_stack_client/lib/stream_printer.py  |   24 -
 src/llama_stack_client/lib/tools/mcp_oauth.py |  297 ----
 src/llama_stack_client/pagination.py          |   50 -
 src/llama_stack_client/resources/__init__.py  |  383 -----
 .../resources/agents/__init__.py              |   61 -
 .../resources/agents/agents.py                |  344 ----
 .../resources/agents/session.py               |  362 -----
 .../resources/agents/steps.py                 |  181 ---
 .../resources/agents/turn.py                  |  875 ----------
 .../resources/benchmarks.py                   |  359 -----
 .../resources/chat/__init__.py                |   33 -
 src/llama_stack_client/resources/chat/chat.py |  102 --
 .../resources/chat/completions.py             | 1048 ------------
 .../resources/completions.py                  |  737 ---------
 src/llama_stack_client/resources/datasets.py  |  588 -------
 .../resources/embeddings.py                   |  223 ---
 .../resources/eval/__init__.py                |   33 -
 src/llama_stack_client/resources/eval/eval.py |  530 -------
 src/llama_stack_client/resources/files.py     |  572 -------
 src/llama_stack_client/resources/inference.py | 1401 -----------------
 src/llama_stack_client/resources/inspect.py   |  186 ---
 src/llama_stack_client/resources/models.py    |  430 -----
 .../resources/post_training/__init__.py       |   33 -
 .../resources/post_training/job.py            |  404 -----
 .../resources/post_training/post_training.py  |  393 -----
 src/llama_stack_client/resources/providers.py |  225 ---
 .../resources/responses/__init__.py           |   33 -
 .../resources/responses/input_items.py        |  226 ---
 .../resources/responses/responses.py          |  689 --------
 src/llama_stack_client/resources/routes.py    |  146 --
 src/llama_stack_client/resources/safety.py    |  196 ---
 src/llama_stack_client/resources/scoring.py   |  295 ----
 .../resources/scoring_functions.py            |  359 -----
 src/llama_stack_client/resources/shields.py   |  341 ----
 .../resources/synthetic_data_generation.py    |  185 ---
 .../resources/tool_runtime/__init__.py        |   33 -
 .../resources/tool_runtime/rag_tool.py        |  290 ----
 .../resources/tool_runtime/tool_runtime.py    |  327 ----
 .../resources/toolgroups.py                   |  423 -----
 src/llama_stack_client/resources/tools.py     |  257 ---
 .../resources/vector_dbs.py                   |  430 -----
 src/llama_stack_client/resources/vector_io.py |  314 ----
 .../resources/vector_stores/__init__.py       |   33 -
 .../resources/vector_stores/files.py          |  201 ---
 .../resources/vector_stores/vector_stores.py  |  825 ----------
 src/llama_stack_client/types/__init__.py      |  175 --
 .../types/agent_create_params.py              |   14 -
 .../types/agent_create_response.py            |    9 -
 .../types/agents/__init__.py                  |   15 -
 .../agent_turn_response_stream_chunk.py       |   10 -
 .../types/agents/session.py                   |   19 -
 .../types/agents/session_create_params.py     |   12 -
 .../types/agents/session_create_response.py   |    9 -
 .../types/agents/session_retrieve_params.py   |   15 -
 .../types/agents/step_retrieve_response.py    |   23 -
 src/llama_stack_client/types/agents/turn.py   |  107 --
 .../types/agents/turn_create_params.py        |  161 --
 .../types/agents/turn_response_event.py       |   10 -
 .../agents/turn_response_event_payload.py     |   97 --
 .../types/agents/turn_resume_params.py        |   32 -
 .../types/algorithm_config_param.py           |   37 -
 src/llama_stack_client/types/benchmark.py     |   24 -
 .../types/benchmark_config_param.py           |   28 -
 .../types/benchmark_list_response.py          |   10 -
 .../types/benchmark_register_params.py        |   28 -
 src/llama_stack_client/types/chat/__init__.py |    9 -
 .../types/chat/completion_create_params.py    |  401 -----
 .../types/chat/completion_create_response.py  |  383 -----
 .../types/chat/completion_list_params.py      |   21 -
 .../types/chat/completion_list_response.py    |  667 --------
 .../chat/completion_retrieve_response.py      |  626 --------
 .../types/chat_completion_chunk.py            |  124 --
 .../chat_completion_response_stream_chunk.py  |   42 -
 .../types/completion_create_params.py         |   82 -
 .../types/completion_create_response.py       |   86 -
 .../types/completion_response.py              |   30 -
 .../types/create_embeddings_response.py       |   44 -
 .../types/dataset_iterrows_params.py          |   15 -
 .../types/dataset_iterrows_response.py        |   18 -
 .../types/dataset_list_response.py            |   64 -
 .../types/dataset_register_params.py          |   69 -
 .../types/dataset_register_response.py        |   52 -
 .../types/dataset_retrieve_response.py        |   52 -
 .../types/delete_file_response.py             |   18 -
 .../types/embedding_create_params.py          |   41 -
 .../types/embeddings_response.py              |   16 -
 .../types/eval_candidate_param.py             |   35 -
 .../types/eval_evaluate_rows_alpha_params.py  |   21 -
 .../types/eval_evaluate_rows_params.py        |   21 -
 .../types/eval_run_eval_alpha_params.py       |   14 -
 .../types/eval_run_eval_params.py             |   14 -
 .../types/evaluate_response.py                |   16 -
 src/llama_stack_client/types/event_param.py   |   89 --
 src/llama_stack_client/types/file.py          |   30 -
 .../types/file_create_params.py               |   16 -
 .../types/file_list_params.py                 |   33 -
 src/llama_stack_client/types/health_info.py   |   11 -
 .../inference_batch_chat_completion_params.py |   85 -
 ...nference_batch_chat_completion_response.py |   12 -
 .../inference_batch_completion_params.py      |   41 -
 .../types/inference_chat_completion_params.py |  134 --
 .../types/inference_completion_params.py      |   65 -
 .../types/inference_embeddings_params.py      |   45 -
 .../types/inference_step.py                   |   32 -
 src/llama_stack_client/types/job.py           |   13 -
 .../types/list_benchmarks_response.py         |   10 -
 .../types/list_datasets_response.py           |   10 -
 .../types/list_files_response.py              |   23 -
 .../types/list_models_response.py             |   10 -
 .../types/list_post_training_jobs_response.py |   15 -
 .../types/list_providers_response.py          |   10 -
 .../types/list_routes_response.py             |   10 -
 .../types/list_scoring_functions_response.py  |   10 -
 .../types/list_shields_response.py            |   10 -
 .../types/list_tool_groups_response.py        |   10 -
 .../types/list_tools_response.py              |   10 -
 .../types/list_vector_dbs_response.py         |   10 -
 .../types/list_vector_stores_response.py      |   20 -
 .../types/memory_retrieval_step.py            |   33 -
 src/llama_stack_client/types/model.py         |   24 -
 .../types/model_register_params.py            |   25 -
 .../types/post_training/__init__.py           |   10 -
 .../post_training/job_artifacts_params.py     |   12 -
 .../post_training/job_artifacts_response.py   |   13 -
 .../types/post_training/job_cancel_params.py  |   12 -
 .../types/post_training/job_list_response.py  |   15 -
 .../types/post_training/job_status_params.py  |   12 -
 .../post_training/job_status_response.py      |   25 -
 .../types/post_training_job.py                |    9 -
 ...ost_training_preference_optimize_params.py |   99 --
 ...st_training_supervised_fine_tune_params.py |   93 --
 src/llama_stack_client/types/provider_info.py |   19 -
 .../types/provider_list_response.py           |   10 -
 .../types/query_chunks_response.py            |   80 -
 .../types/query_condition_param.py            |   16 -
 .../types/query_spans_response.py             |   10 -
 .../types/response_create_params.py           |  348 ----
 .../types/response_list_params.py             |   21 -
 .../types/response_list_response.py           |  473 ------
 .../types/response_object.py                  |  290 ----
 .../types/response_object_stream.py           |  677 --------
 .../types/responses/__init__.py               |    6 -
 .../types/responses/input_item_list_params.py |   28 -
 .../responses/input_item_list_response.py     |  187 ---
 src/llama_stack_client/types/route_info.py    |   15 -
 .../types/route_list_response.py              |   10 -
 .../types/run_shield_response.py              |   12 -
 .../types/safety_run_shield_params.py         |   21 -
 src/llama_stack_client/types/scoring_fn.py    |   28 -
 .../types/scoring_fn_params.py                |   41 -
 .../types/scoring_fn_params_param.py          |   43 -
 .../types/scoring_function_list_response.py   |   10 -
 .../types/scoring_function_register_params.py |   32 -
 .../types/scoring_score_batch_params.py       |   21 -
 .../types/scoring_score_batch_response.py     |   14 -
 .../types/scoring_score_params.py             |   18 -
 .../types/scoring_score_response.py           |   13 -
 .../types/shared/__init__.py                  |   26 -
 .../types/shared/agent_config.py              |   92 --
 .../types/shared/batch_completion.py          |   12 -
 .../types/shared/chat_completion_response.py  |   27 -
 .../types/shared/completion_message.py        |   31 -
 .../types/shared/content_delta.py             |   33 -
 .../types/shared/document.py                  |   71 -
 .../types/shared/interleaved_content.py       |   49 -
 .../types/shared/interleaved_content_item.py  |   51 -
 .../types/shared/message.py                   |   16 -
 .../types/shared/param_type.py                |   78 -
 .../types/shared/query_config.py              |   65 -
 .../types/shared/query_generator_config.py    |   28 -
 .../types/shared/query_result.py              |   15 -
 .../types/shared/response_format.py           |   33 -
 .../types/shared/return_type.py               |   22 -
 .../types/shared/safety_violation.py          |   16 -
 .../types/shared/sampling_params.py           |   64 -
 .../types/shared/scoring_result.py            |   15 -
 .../types/shared/system_message.py            |   21 -
 .../types/shared/tool_call.py                 |   26 -
 .../types/shared/tool_call_or_string.py       |   10 -
 .../types/shared/tool_param_definition.py     |   17 -
 .../types/shared/tool_response_message.py     |   19 -
 .../types/shared/user_message.py              |   23 -
 .../types/shared_params/__init__.py           |   18 -
 .../types/shared_params/agent_config.py       |   93 --
 .../types/shared_params/completion_message.py |   32 -
 .../types/shared_params/document.py           |   72 -
 .../shared_params/interleaved_content.py      |   50 -
 .../shared_params/interleaved_content_item.py |   48 -
 .../types/shared_params/message.py            |   15 -
 .../types/shared_params/query_config.py       |   65 -
 .../shared_params/query_generator_config.py   |   25 -
 .../types/shared_params/response_format.py    |   30 -
 .../types/shared_params/return_type.py        |   24 -
 .../types/shared_params/sampling_params.py    |   60 -
 .../types/shared_params/system_message.py     |   22 -
 .../types/shared_params/tool_call.py          |   33 -
 .../shared_params/tool_param_definition.py    |   18 -
 .../shared_params/tool_response_message.py    |   20 -
 .../types/shared_params/user_message.py       |   23 -
 src/llama_stack_client/types/shield.py        |   20 -
 .../types/shield_call_step.py                 |   30 -
 .../types/shield_register_params.py           |   22 -
 .../types/span_with_status.py                 |   27 -
 ...nthetic_data_generation_generate_params.py |   19 -
 .../synthetic_data_generation_response.py     |   13 -
 .../types/telemetry_get_span_response.py      |   24 -
 .../types/telemetry_get_span_tree_params.py   |   16 -
 .../types/telemetry_get_span_tree_response.py |   10 -
 .../types/telemetry_log_event_params.py       |   17 -
 .../types/telemetry_query_spans_params.py     |   21 -
 .../types/telemetry_query_spans_response.py   |   28 -
 .../types/telemetry_query_traces_params.py    |   24 -
 .../types/telemetry_query_traces_response.py  |   10 -
 .../telemetry_save_spans_to_dataset_params.py |   24 -
 .../types/token_log_probs.py                  |   12 -
 src/llama_stack_client/types/tool.py          |   38 -
 src/llama_stack_client/types/tool_def.py      |   29 -
 .../types/tool_def_param.py                   |   30 -
 .../types/tool_execution_step.py              |   34 -
 src/llama_stack_client/types/tool_group.py    |   26 -
 .../types/tool_invocation_result.py           |   19 -
 src/llama_stack_client/types/tool_response.py |   20 -
 .../types/tool_response_param.py              |   21 -
 .../types/tool_runtime/__init__.py            |    6 -
 .../tool_runtime/rag_tool_insert_params.py    |   18 -
 .../tool_runtime/rag_tool_query_params.py     |   21 -
 .../types/tool_runtime_invoke_tool_params.py  |   16 -
 .../types/tool_runtime_list_tools_params.py   |   19 -
 .../types/tool_runtime_list_tools_response.py |   10 -
 .../types/toolgroup_list_response.py          |   10 -
 .../types/toolgroup_register_params.py        |   26 -
 src/llama_stack_client/types/trace.py         |   18 -
 .../types/vector_db_list_response.py          |   25 -
 .../types/vector_db_register_params.py        |   24 -
 .../types/vector_db_register_response.py      |   22 -
 .../types/vector_db_retrieve_response.py      |   22 -
 .../types/vector_io_insert_params.py          |   94 --
 .../types/vector_io_query_params.py           |   21 -
 src/llama_stack_client/types/vector_store.py  |   44 -
 .../types/vector_store_create_params.py       |   43 -
 .../types/vector_store_delete_response.py     |   13 -
 .../types/vector_store_list_params.py         |   33 -
 .../types/vector_store_search_params.py       |   34 -
 .../types/vector_store_search_response.py     |   38 -
 .../types/vector_store_update_params.py       |   19 -
 .../types/vector_stores/__init__.py           |    6 -
 .../types/vector_stores/file_create_params.py |   46 -
 .../types/vector_stores/vector_store_file.py  |   64 -
 src/llama_stack_client/types/version_info.py  |    9 -
 tests/api_resources/agents/test_session.py    |  321 ----
 tests/api_resources/agents/test_steps.py      |  172 --
 tests/api_resources/agents/test_turn.py       | 1030 ------------
 tests/api_resources/chat/__init__.py          |    1 -
 tests/api_resources/chat/test_completions.py  |  514 ------
 tests/api_resources/eval/__init__.py          |    1 -
 tests/api_resources/eval/test_jobs.py         |  312 ----
 tests/api_resources/post_training/__init__.py |    1 -
 tests/api_resources/post_training/test_job.py |  264 ----
 tests/api_resources/responses/__init__.py     |    1 -
 .../responses/test_input_items.py             |  124 --
 .../{agents => store}/__init__.py             |    0
 tests/api_resources/store/test_order.py       |  243 +++
 tests/api_resources/test_agents.py            |  278 ----
 tests/api_resources/test_benchmarks.py        |  248 ---
 tests/api_resources/test_completions.py       |  268 ----
 tests/api_resources/test_datasets.py          |  437 -----
 tests/api_resources/test_embeddings.py        |  114 --
 tests/api_resources/test_eval.py              | 1115 -------------
 tests/api_resources/test_files.py             |  390 -----
 tests/api_resources/test_inference.py         | 1035 ------------
 tests/api_resources/test_inspect.py           |  124 --
 tests/api_resources/test_models.py            |  310 ----
 tests/api_resources/test_pet.py               |  717 +++++++++
 tests/api_resources/test_post_training.py     |  462 ------
 tests/api_resources/test_providers.py         |  150 --
 tests/api_resources/test_responses.py         |  426 -----
 tests/api_resources/test_routes.py            |   74 -
 tests/api_resources/test_safety.py            |  128 --
 tests/api_resources/test_scoring.py           |  253 ---
 tests/api_resources/test_scoring_functions.py |  263 ----
 tests/api_resources/test_shields.py           |  232 ---
 tests/api_resources/test_store.py             |   80 +
 .../test_synthetic_data_generation.py         |  152 --
 tests/api_resources/test_telemetry.py         |  813 ----------
 tests/api_resources/test_tool_runtime.py      |  161 --
 tests/api_resources/test_toolgroups.py        |  314 ----
 tests/api_resources/test_tools.py             |  164 --
 tests/api_resources/test_user.py              |  620 ++++++++
 tests/api_resources/test_vector_dbs.py        |  320 ----
 tests/api_resources/test_vector_io.py         |  266 ----
 tests/api_resources/test_vector_stores.py     |  555 -------
 tests/api_resources/tool_runtime/__init__.py  |    1 -
 .../tool_runtime/test_rag_tool.py             |  246 ---
 tests/api_resources/vector_stores/__init__.py |    1 -
 .../api_resources/vector_stores/test_files.py |  128 --
 tests/conftest.py                             |   18 +-
 tests/test_client.py                          |  427 ++---
 tests/test_deepcopy.py                        |    2 +-
 tests/test_extract_files.py                   |    4 +-
 tests/test_files.py                           |    2 +-
 tests/test_models.py                          |   51 +-
 tests/test_qs.py                              |    2 +-
 tests/test_required_args.py                   |    2 +-
 tests/test_response.py                        |   38 +-
 tests/test_streaming.py                       |   34 +-
 tests/test_transform.py                       |    8 +-
 tests/test_utils/test_proxy.py                |    2 +-
 tests/test_utils/test_typing.py               |    2 +-
 tests/utils.py                                |    8 +-
 uv.lock                                       |  802 ----------
 460 files changed, 5384 insertions(+), 45868 deletions(-)
 delete mode 100644 .github/CODEOWNERS
 delete mode 100644 .github/ISSUE_TEMPLATE/bug.yml
 delete mode 100644 .github/PULL_REQUEST_TEMPLATE.md
 create mode 100644 .github/workflows/ci.yml
 delete mode 100644 .github/workflows/pre-commit.yml
 delete mode 100644 .github/workflows/publish-to-test-pypi.yml
 delete mode 100644 .pre-commit-config.yaml
 delete mode 100644 .ruff.toml
 delete mode 100644 CODE_OF_CONDUCT.md
 delete mode 100644 docs/cli_reference.md
 delete mode 100644 examples/mcp_agent.py
 delete mode 100644 examples/post_training/supervised_fine_tune_client.py
 create mode 100644 requirements-dev.lock
 create mode 100644 requirements.lock
 delete mode 100644 scripts/gen_cli_doc.py
 create mode 100755 scripts/utils/upload-artifact.sh
 rename src/{llama_stack_client => llama_stack_cli}/__init__.py (79%)
 rename src/{llama_stack_client => llama_stack_cli}/_base_client.py (99%)
 create mode 100644 src/llama_stack_cli/_client.py
 rename src/{llama_stack_client => llama_stack_cli}/_compat.py (100%)
 rename src/{llama_stack_client => llama_stack_cli}/_constants.py (100%)
 rename src/{llama_stack_client => llama_stack_cli}/_exceptions.py (97%)
 rename src/{llama_stack_client => llama_stack_cli}/_files.py (96%)
 rename src/{llama_stack_client => llama_stack_cli}/_models.py (98%)
 rename src/{llama_stack_client => llama_stack_cli}/_qs.py (100%)
 rename src/{llama_stack_client => llama_stack_cli}/_resource.py (78%)
 rename src/{llama_stack_client => llama_stack_cli}/_response.py (98%)
 rename src/{llama_stack_client => llama_stack_cli}/_streaming.py (98%)
 rename src/{llama_stack_client => llama_stack_cli}/_types.py (99%)
 rename src/{llama_stack_client => llama_stack_cli}/_utils/__init__.py (100%)
 rename src/{llama_stack_client => llama_stack_cli}/_utils/_logs.py (62%)
 rename src/{llama_stack_client => llama_stack_cli}/_utils/_proxy.py (100%)
 rename src/{llama_stack_client => llama_stack_cli}/_utils/_reflection.py (100%)
 create mode 100644 src/llama_stack_cli/_utils/_resources_proxy.py
 rename src/{llama_stack_client => llama_stack_cli}/_utils/_streams.py (100%)
 rename src/{llama_stack_client => llama_stack_cli}/_utils/_sync.py (100%)
 rename src/{llama_stack_client => llama_stack_cli}/_utils/_transform.py (100%)
 rename src/{llama_stack_client => llama_stack_cli}/_utils/_typing.py (100%)
 rename src/{llama_stack_client => llama_stack_cli}/_utils/_utils.py (100%)
 rename src/{llama_stack_client => llama_stack_cli}/_version.py (59%)
 rename src/{llama_stack_client => llama_stack_cli}/lib/.keep (81%)
 rename src/{llama_stack_client => llama_stack_cli}/py.typed (100%)
 create mode 100644 src/llama_stack_cli/resources/__init__.py
 rename src/{llama_stack_client/resources/telemetry.py => llama_stack_cli/resources/pet.py} (51%)
 create mode 100644 src/llama_stack_cli/resources/store/__init__.py
 rename src/{llama_stack_client/resources/eval/jobs.py => llama_stack_cli/resources/store/order.py} (58%)
 create mode 100644 src/llama_stack_cli/resources/store/store.py
 create mode 100644 src/llama_stack_cli/resources/user.py
 create mode 100644 src/llama_stack_cli/types/__init__.py
 create mode 100644 src/llama_stack_cli/types/category.py
 rename src/{llama_stack_client/types/tool_list_params.py => llama_stack_cli/types/category_param.py} (52%)
 create mode 100644 src/llama_stack_cli/types/pet.py
 create mode 100644 src/llama_stack_cli/types/pet_create_params.py
 create mode 100644 src/llama_stack_cli/types/pet_find_by_status_params.py
 rename src/{llama_stack_client/types/shield_list_response.py => llama_stack_cli/types/pet_find_by_status_response.py} (59%)
 create mode 100644 src/llama_stack_cli/types/pet_find_by_tags_params.py
 rename src/{llama_stack_client/types/model_list_response.py => llama_stack_cli/types/pet_find_by_tags_response.py} (60%)
 create mode 100644 src/llama_stack_cli/types/pet_update_by_id_params.py
 create mode 100644 src/llama_stack_cli/types/pet_update_params.py
 create mode 100644 src/llama_stack_cli/types/pet_upload_image_params.py
 create mode 100644 src/llama_stack_cli/types/pet_upload_image_response.py
 rename src/{llama_stack_client/types/eval => llama_stack_cli/types/shared}/__init__.py (71%)
 create mode 100644 src/llama_stack_cli/types/shared/order.py
 create mode 100644 src/llama_stack_cli/types/store/__init__.py
 create mode 100644 src/llama_stack_cli/types/store/order_create_params.py
 rename src/{llama_stack_client/types/tool_list_response.py => llama_stack_cli/types/store_list_inventory_response.py} (51%)
 create mode 100644 src/llama_stack_cli/types/user.py
 create mode 100644 src/llama_stack_cli/types/user_create_params.py
 create mode 100644 src/llama_stack_cli/types/user_create_with_list_params.py
 create mode 100644 src/llama_stack_cli/types/user_login_params.py
 create mode 100644 src/llama_stack_cli/types/user_login_response.py
 create mode 100644 src/llama_stack_cli/types/user_param.py
 create mode 100644 src/llama_stack_cli/types/user_update_params.py
 delete mode 100644 src/llama_stack_client/_client.py
 delete mode 100644 src/llama_stack_client/_utils/_resources_proxy.py
 delete mode 100644 src/llama_stack_client/_wrappers.py
 delete mode 100644 src/llama_stack_client/lib/__init__.py
 delete mode 100644 src/llama_stack_client/lib/agents/__init__.py
 delete mode 100644 src/llama_stack_client/lib/agents/agent.py
 delete mode 100644 src/llama_stack_client/lib/agents/client_tool.py
 delete mode 100644 src/llama_stack_client/lib/agents/event_logger.py
 delete mode 100644 src/llama_stack_client/lib/agents/react/__init__.py
 delete mode 100644 src/llama_stack_client/lib/agents/react/agent.py
 delete mode 100644 src/llama_stack_client/lib/agents/react/prompts.py
 delete mode 100644 src/llama_stack_client/lib/agents/react/tool_parser.py
 delete mode 100644 src/llama_stack_client/lib/agents/tool_parser.py
 delete mode 100644 src/llama_stack_client/lib/cli/__init__.py
 delete mode 100644 src/llama_stack_client/lib/cli/common/__init__.py
 delete mode 100644 src/llama_stack_client/lib/cli/common/utils.py
 delete mode 100644 src/llama_stack_client/lib/cli/configure.py
 delete mode 100644 src/llama_stack_client/lib/cli/constants.py
 delete mode 100644 src/llama_stack_client/lib/cli/datasets/__init__.py
 delete mode 100644 src/llama_stack_client/lib/cli/datasets/datasets.py
 delete mode 100644 src/llama_stack_client/lib/cli/datasets/list.py
 delete mode 100644 src/llama_stack_client/lib/cli/datasets/register.py
 delete mode 100644 src/llama_stack_client/lib/cli/datasets/unregister.py
 delete mode 100644 src/llama_stack_client/lib/cli/eval/__init__.py
 delete mode 100644 src/llama_stack_client/lib/cli/eval/eval.py
 delete mode 100644 src/llama_stack_client/lib/cli/eval/run_benchmark.py
 delete mode 100644 src/llama_stack_client/lib/cli/eval/run_scoring.py
 delete mode 100644 src/llama_stack_client/lib/cli/eval/utils.py
 delete mode 100644 src/llama_stack_client/lib/cli/eval_tasks/__init__.py
 delete mode 100644 src/llama_stack_client/lib/cli/eval_tasks/eval_tasks.py
 delete mode 100644 src/llama_stack_client/lib/cli/eval_tasks/list.py
 delete mode 100644 src/llama_stack_client/lib/cli/inference/__init__.py
 delete mode 100644 src/llama_stack_client/lib/cli/inference/inference.py
 delete mode 100644 src/llama_stack_client/lib/cli/inspect/__init__.py
 delete mode 100644 src/llama_stack_client/lib/cli/inspect/inspect.py
 delete mode 100644 src/llama_stack_client/lib/cli/inspect/version.py
 delete mode 100644 src/llama_stack_client/lib/cli/llama_stack_client.py
 delete mode 100644 src/llama_stack_client/lib/cli/models/__init__.py
 delete mode 100644 src/llama_stack_client/lib/cli/models/models.py
 delete mode 100644 src/llama_stack_client/lib/cli/post_training/__init__.py
 delete mode 100644 src/llama_stack_client/lib/cli/post_training/post_training.py
 delete mode 100644 src/llama_stack_client/lib/cli/providers/__init__.py
 delete mode 100644 src/llama_stack_client/lib/cli/providers/inspect.py
 delete mode 100644 src/llama_stack_client/lib/cli/providers/list.py
 delete mode 100644 src/llama_stack_client/lib/cli/providers/providers.py
 delete mode 100644 src/llama_stack_client/lib/cli/scoring_functions/__init__.py
 delete mode 100644 src/llama_stack_client/lib/cli/scoring_functions/list.py
 delete mode 100644 src/llama_stack_client/lib/cli/scoring_functions/scoring_functions.py
 delete mode 100644 src/llama_stack_client/lib/cli/shields/__init__.py
 delete mode 100644 src/llama_stack_client/lib/cli/shields/shields.py
 delete mode 100644 src/llama_stack_client/lib/cli/toolgroups/__init__.py
 delete mode 100644 src/llama_stack_client/lib/cli/toolgroups/toolgroups.py
 delete mode 100644 src/llama_stack_client/lib/cli/vector_dbs/__init__.py
 delete mode 100644 src/llama_stack_client/lib/cli/vector_dbs/vector_dbs.py
 delete mode 100644 src/llama_stack_client/lib/inference/__init__.py
 delete mode 100644 src/llama_stack_client/lib/inference/event_logger.py
 delete mode 100644 src/llama_stack_client/lib/inference/utils.py
 delete mode 100644 src/llama_stack_client/lib/inline/inline.py
 delete mode 100644 src/llama_stack_client/lib/stream_printer.py
 delete mode 100644 src/llama_stack_client/lib/tools/mcp_oauth.py
 delete mode 100644 src/llama_stack_client/pagination.py
 delete mode 100644 src/llama_stack_client/resources/__init__.py
 delete mode 100644 src/llama_stack_client/resources/agents/__init__.py
 delete mode 100644 src/llama_stack_client/resources/agents/agents.py
 delete mode 100644 src/llama_stack_client/resources/agents/session.py
 delete mode 100644 src/llama_stack_client/resources/agents/steps.py
 delete mode 100644 src/llama_stack_client/resources/agents/turn.py
 delete mode 100644 src/llama_stack_client/resources/benchmarks.py
 delete mode 100644 src/llama_stack_client/resources/chat/__init__.py
 delete mode 100644 src/llama_stack_client/resources/chat/chat.py
 delete mode 100644 src/llama_stack_client/resources/chat/completions.py
 delete mode 100644 src/llama_stack_client/resources/completions.py
 delete mode 100644 src/llama_stack_client/resources/datasets.py
 delete mode 100644 src/llama_stack_client/resources/embeddings.py
 delete mode 100644 src/llama_stack_client/resources/eval/__init__.py
 delete mode 100644 src/llama_stack_client/resources/eval/eval.py
 delete mode 100644 src/llama_stack_client/resources/files.py
 delete mode 100644 src/llama_stack_client/resources/inference.py
 delete mode 100644 src/llama_stack_client/resources/inspect.py
 delete mode 100644 src/llama_stack_client/resources/models.py
 delete mode 100644 src/llama_stack_client/resources/post_training/__init__.py
 delete mode 100644 src/llama_stack_client/resources/post_training/job.py
 delete mode 100644 src/llama_stack_client/resources/post_training/post_training.py
 delete mode 100644 src/llama_stack_client/resources/providers.py
 delete mode 100644 src/llama_stack_client/resources/responses/__init__.py
 delete mode 100644 src/llama_stack_client/resources/responses/input_items.py
 delete mode 100644 src/llama_stack_client/resources/responses/responses.py
 delete mode 100644 src/llama_stack_client/resources/routes.py
 delete mode 100644 src/llama_stack_client/resources/safety.py
 delete mode 100644 src/llama_stack_client/resources/scoring.py
 delete mode 100644 src/llama_stack_client/resources/scoring_functions.py
 delete mode 100644 src/llama_stack_client/resources/shields.py
 delete mode 100644 src/llama_stack_client/resources/synthetic_data_generation.py
 delete mode 100644 src/llama_stack_client/resources/tool_runtime/__init__.py
 delete mode 100644 src/llama_stack_client/resources/tool_runtime/rag_tool.py
 delete mode 100644 src/llama_stack_client/resources/tool_runtime/tool_runtime.py
 delete mode 100644 src/llama_stack_client/resources/toolgroups.py
 delete mode 100644 src/llama_stack_client/resources/tools.py
 delete mode 100644 src/llama_stack_client/resources/vector_dbs.py
 delete mode 100644 src/llama_stack_client/resources/vector_io.py
 delete mode 100644 src/llama_stack_client/resources/vector_stores/__init__.py
 delete mode 100644 src/llama_stack_client/resources/vector_stores/files.py
 delete mode 100644 src/llama_stack_client/resources/vector_stores/vector_stores.py
 delete mode 100644 src/llama_stack_client/types/__init__.py
 delete mode 100644 src/llama_stack_client/types/agent_create_params.py
 delete mode 100644 src/llama_stack_client/types/agent_create_response.py
 delete mode 100644 src/llama_stack_client/types/agents/__init__.py
 delete mode 100644 src/llama_stack_client/types/agents/agent_turn_response_stream_chunk.py
 delete mode 100644 src/llama_stack_client/types/agents/session.py
 delete mode 100644 src/llama_stack_client/types/agents/session_create_params.py
 delete mode 100644 src/llama_stack_client/types/agents/session_create_response.py
 delete mode 100644 src/llama_stack_client/types/agents/session_retrieve_params.py
 delete mode 100644 src/llama_stack_client/types/agents/step_retrieve_response.py
 delete mode 100644 src/llama_stack_client/types/agents/turn.py
 delete mode 100644 src/llama_stack_client/types/agents/turn_create_params.py
 delete mode 100644 src/llama_stack_client/types/agents/turn_response_event.py
 delete mode 100644 src/llama_stack_client/types/agents/turn_response_event_payload.py
 delete mode 100644 src/llama_stack_client/types/agents/turn_resume_params.py
 delete mode 100644 src/llama_stack_client/types/algorithm_config_param.py
 delete mode 100644 src/llama_stack_client/types/benchmark.py
 delete mode 100644 src/llama_stack_client/types/benchmark_config_param.py
 delete mode 100644 src/llama_stack_client/types/benchmark_list_response.py
 delete mode 100644 src/llama_stack_client/types/benchmark_register_params.py
 delete mode 100644 src/llama_stack_client/types/chat/__init__.py
 delete mode 100644 src/llama_stack_client/types/chat/completion_create_params.py
 delete mode 100644 src/llama_stack_client/types/chat/completion_create_response.py
 delete mode 100644 src/llama_stack_client/types/chat/completion_list_params.py
 delete mode 100644 src/llama_stack_client/types/chat/completion_list_response.py
 delete mode 100644 src/llama_stack_client/types/chat/completion_retrieve_response.py
 delete mode 100644 src/llama_stack_client/types/chat_completion_chunk.py
 delete mode 100644 src/llama_stack_client/types/chat_completion_response_stream_chunk.py
 delete mode 100644 src/llama_stack_client/types/completion_create_params.py
 delete mode 100644 src/llama_stack_client/types/completion_create_response.py
 delete mode 100644 src/llama_stack_client/types/completion_response.py
 delete mode 100644 src/llama_stack_client/types/create_embeddings_response.py
 delete mode 100644 src/llama_stack_client/types/dataset_iterrows_params.py
 delete mode 100644 src/llama_stack_client/types/dataset_iterrows_response.py
 delete mode 100644 src/llama_stack_client/types/dataset_list_response.py
 delete mode 100644 src/llama_stack_client/types/dataset_register_params.py
 delete mode 100644 src/llama_stack_client/types/dataset_register_response.py
 delete mode 100644 src/llama_stack_client/types/dataset_retrieve_response.py
 delete mode 100644 src/llama_stack_client/types/delete_file_response.py
 delete mode 100644 src/llama_stack_client/types/embedding_create_params.py
 delete mode 100644 src/llama_stack_client/types/embeddings_response.py
 delete mode 100644 src/llama_stack_client/types/eval_candidate_param.py
 delete mode 100644 src/llama_stack_client/types/eval_evaluate_rows_alpha_params.py
 delete mode 100644 src/llama_stack_client/types/eval_evaluate_rows_params.py
 delete mode 100644 src/llama_stack_client/types/eval_run_eval_alpha_params.py
 delete mode 100644 src/llama_stack_client/types/eval_run_eval_params.py
 delete mode 100644 src/llama_stack_client/types/evaluate_response.py
 delete mode 100644 src/llama_stack_client/types/event_param.py
 delete mode 100644 src/llama_stack_client/types/file.py
 delete mode 100644 src/llama_stack_client/types/file_create_params.py
 delete mode 100644 src/llama_stack_client/types/file_list_params.py
 delete mode 100644 src/llama_stack_client/types/health_info.py
 delete mode 100644 src/llama_stack_client/types/inference_batch_chat_completion_params.py
 delete mode 100644 src/llama_stack_client/types/inference_batch_chat_completion_response.py
 delete mode 100644 src/llama_stack_client/types/inference_batch_completion_params.py
 delete mode 100644 src/llama_stack_client/types/inference_chat_completion_params.py
 delete mode 100644 src/llama_stack_client/types/inference_completion_params.py
 delete mode 100644 src/llama_stack_client/types/inference_embeddings_params.py
 delete mode 100644 src/llama_stack_client/types/inference_step.py
 delete mode 100644 src/llama_stack_client/types/job.py
 delete mode 100644 src/llama_stack_client/types/list_benchmarks_response.py
 delete mode 100644 src/llama_stack_client/types/list_datasets_response.py
 delete mode 100644 src/llama_stack_client/types/list_files_response.py
 delete mode 100644 src/llama_stack_client/types/list_models_response.py
 delete mode 100644 src/llama_stack_client/types/list_post_training_jobs_response.py
 delete mode 100644 src/llama_stack_client/types/list_providers_response.py
 delete mode 100644 src/llama_stack_client/types/list_routes_response.py
 delete mode 100644 src/llama_stack_client/types/list_scoring_functions_response.py
 delete mode 100644 src/llama_stack_client/types/list_shields_response.py
 delete mode 100644 src/llama_stack_client/types/list_tool_groups_response.py
 delete mode 100644 src/llama_stack_client/types/list_tools_response.py
 delete mode 100644 src/llama_stack_client/types/list_vector_dbs_response.py
 delete mode 100644 src/llama_stack_client/types/list_vector_stores_response.py
 delete mode 100644 src/llama_stack_client/types/memory_retrieval_step.py
 delete mode 100644 src/llama_stack_client/types/model.py
 delete mode 100644 src/llama_stack_client/types/model_register_params.py
 delete mode 100644 src/llama_stack_client/types/post_training/__init__.py
 delete mode 100644 src/llama_stack_client/types/post_training/job_artifacts_params.py
 delete mode 100644 src/llama_stack_client/types/post_training/job_artifacts_response.py
 delete mode 100644 src/llama_stack_client/types/post_training/job_cancel_params.py
 delete mode 100644 src/llama_stack_client/types/post_training/job_list_response.py
 delete mode 100644 src/llama_stack_client/types/post_training/job_status_params.py
 delete mode 100644 src/llama_stack_client/types/post_training/job_status_response.py
 delete mode 100644 src/llama_stack_client/types/post_training_job.py
 delete mode 100644 src/llama_stack_client/types/post_training_preference_optimize_params.py
 delete mode 100644 src/llama_stack_client/types/post_training_supervised_fine_tune_params.py
 delete mode 100644 src/llama_stack_client/types/provider_info.py
 delete mode 100644 src/llama_stack_client/types/provider_list_response.py
 delete mode 100644 src/llama_stack_client/types/query_chunks_response.py
 delete mode 100644 src/llama_stack_client/types/query_condition_param.py
 delete mode 100644 src/llama_stack_client/types/query_spans_response.py
 delete mode 100644 src/llama_stack_client/types/response_create_params.py
 delete mode 100644 src/llama_stack_client/types/response_list_params.py
 delete mode 100644 src/llama_stack_client/types/response_list_response.py
 delete mode 100644 src/llama_stack_client/types/response_object.py
 delete mode 100644 src/llama_stack_client/types/response_object_stream.py
 delete mode 100644 src/llama_stack_client/types/responses/__init__.py
 delete mode 100644 src/llama_stack_client/types/responses/input_item_list_params.py
 delete mode 100644 src/llama_stack_client/types/responses/input_item_list_response.py
 delete mode 100644 src/llama_stack_client/types/route_info.py
 delete mode 100644 src/llama_stack_client/types/route_list_response.py
 delete mode 100644 src/llama_stack_client/types/run_shield_response.py
 delete mode 100644 src/llama_stack_client/types/safety_run_shield_params.py
 delete mode 100644 src/llama_stack_client/types/scoring_fn.py
 delete mode 100644 src/llama_stack_client/types/scoring_fn_params.py
 delete mode 100644 src/llama_stack_client/types/scoring_fn_params_param.py
 delete mode 100644 src/llama_stack_client/types/scoring_function_list_response.py
 delete mode 100644 src/llama_stack_client/types/scoring_function_register_params.py
 delete mode 100644 src/llama_stack_client/types/scoring_score_batch_params.py
 delete mode 100644 src/llama_stack_client/types/scoring_score_batch_response.py
 delete mode 100644 src/llama_stack_client/types/scoring_score_params.py
 delete mode 100644 src/llama_stack_client/types/scoring_score_response.py
 delete mode 100644 src/llama_stack_client/types/shared/__init__.py
 delete mode 100644 src/llama_stack_client/types/shared/agent_config.py
 delete mode 100644 src/llama_stack_client/types/shared/batch_completion.py
 delete mode 100644 src/llama_stack_client/types/shared/chat_completion_response.py
 delete mode 100644 src/llama_stack_client/types/shared/completion_message.py
 delete mode 100644 src/llama_stack_client/types/shared/content_delta.py
 delete mode 100644 src/llama_stack_client/types/shared/document.py
 delete mode 100644 src/llama_stack_client/types/shared/interleaved_content.py
 delete mode 100644 src/llama_stack_client/types/shared/interleaved_content_item.py
 delete mode 100644 src/llama_stack_client/types/shared/message.py
 delete mode 100644 src/llama_stack_client/types/shared/param_type.py
 delete mode 100644 src/llama_stack_client/types/shared/query_config.py
 delete mode 100644 src/llama_stack_client/types/shared/query_generator_config.py
 delete mode 100644 src/llama_stack_client/types/shared/query_result.py
 delete mode 100644 src/llama_stack_client/types/shared/response_format.py
 delete mode 100644 src/llama_stack_client/types/shared/return_type.py
 delete mode 100644 src/llama_stack_client/types/shared/safety_violation.py
 delete mode 100644 src/llama_stack_client/types/shared/sampling_params.py
 delete mode 100644 src/llama_stack_client/types/shared/scoring_result.py
 delete mode 100644 src/llama_stack_client/types/shared/system_message.py
 delete mode 100644 src/llama_stack_client/types/shared/tool_call.py
 delete mode 100644 src/llama_stack_client/types/shared/tool_call_or_string.py
 delete mode 100644 src/llama_stack_client/types/shared/tool_param_definition.py
 delete mode 100644 src/llama_stack_client/types/shared/tool_response_message.py
 delete mode 100644 src/llama_stack_client/types/shared/user_message.py
 delete mode 100644 src/llama_stack_client/types/shared_params/__init__.py
 delete mode 100644 src/llama_stack_client/types/shared_params/agent_config.py
 delete mode 100644 src/llama_stack_client/types/shared_params/completion_message.py
 delete mode 100644 src/llama_stack_client/types/shared_params/document.py
 delete mode 100644 src/llama_stack_client/types/shared_params/interleaved_content.py
 delete mode 100644 src/llama_stack_client/types/shared_params/interleaved_content_item.py
 delete mode 100644 src/llama_stack_client/types/shared_params/message.py
 delete mode 100644 src/llama_stack_client/types/shared_params/query_config.py
 delete mode 100644 src/llama_stack_client/types/shared_params/query_generator_config.py
 delete mode 100644 src/llama_stack_client/types/shared_params/response_format.py
 delete mode 100644 src/llama_stack_client/types/shared_params/return_type.py
 delete mode 100644 src/llama_stack_client/types/shared_params/sampling_params.py
 delete mode 100644 src/llama_stack_client/types/shared_params/system_message.py
 delete mode 100644 src/llama_stack_client/types/shared_params/tool_call.py
 delete mode 100644 src/llama_stack_client/types/shared_params/tool_param_definition.py
 delete mode 100644 src/llama_stack_client/types/shared_params/tool_response_message.py
 delete mode 100644 src/llama_stack_client/types/shared_params/user_message.py
 delete mode 100644 src/llama_stack_client/types/shield.py
 delete mode 100644 src/llama_stack_client/types/shield_call_step.py
 delete mode 100644 src/llama_stack_client/types/shield_register_params.py
 delete mode 100644 src/llama_stack_client/types/span_with_status.py
 delete mode 100644 src/llama_stack_client/types/synthetic_data_generation_generate_params.py
 delete mode 100644 src/llama_stack_client/types/synthetic_data_generation_response.py
 delete mode 100644 src/llama_stack_client/types/telemetry_get_span_response.py
 delete mode 100644 src/llama_stack_client/types/telemetry_get_span_tree_params.py
 delete mode 100644 src/llama_stack_client/types/telemetry_get_span_tree_response.py
 delete mode 100644 src/llama_stack_client/types/telemetry_log_event_params.py
 delete mode 100644 src/llama_stack_client/types/telemetry_query_spans_params.py
 delete mode 100644 src/llama_stack_client/types/telemetry_query_spans_response.py
 delete mode 100644 src/llama_stack_client/types/telemetry_query_traces_params.py
 delete mode 100644 src/llama_stack_client/types/telemetry_query_traces_response.py
 delete mode 100644 src/llama_stack_client/types/telemetry_save_spans_to_dataset_params.py
 delete mode 100644 src/llama_stack_client/types/token_log_probs.py
 delete mode 100644 src/llama_stack_client/types/tool.py
 delete mode 100644 src/llama_stack_client/types/tool_def.py
 delete mode 100644 src/llama_stack_client/types/tool_def_param.py
 delete mode 100644 src/llama_stack_client/types/tool_execution_step.py
 delete mode 100644 src/llama_stack_client/types/tool_group.py
 delete mode 100644 src/llama_stack_client/types/tool_invocation_result.py
 delete mode 100644 src/llama_stack_client/types/tool_response.py
 delete mode 100644 src/llama_stack_client/types/tool_response_param.py
 delete mode 100644 src/llama_stack_client/types/tool_runtime/__init__.py
 delete mode 100644 src/llama_stack_client/types/tool_runtime/rag_tool_insert_params.py
 delete mode 100644 src/llama_stack_client/types/tool_runtime/rag_tool_query_params.py
 delete mode 100644 src/llama_stack_client/types/tool_runtime_invoke_tool_params.py
 delete mode 100644 src/llama_stack_client/types/tool_runtime_list_tools_params.py
 delete mode 100644 src/llama_stack_client/types/tool_runtime_list_tools_response.py
 delete mode 100644 src/llama_stack_client/types/toolgroup_list_response.py
 delete mode 100644 src/llama_stack_client/types/toolgroup_register_params.py
 delete mode 100644 src/llama_stack_client/types/trace.py
 delete mode 100644 src/llama_stack_client/types/vector_db_list_response.py
 delete mode 100644 src/llama_stack_client/types/vector_db_register_params.py
 delete mode 100644 src/llama_stack_client/types/vector_db_register_response.py
 delete mode 100644 src/llama_stack_client/types/vector_db_retrieve_response.py
 delete mode 100644 src/llama_stack_client/types/vector_io_insert_params.py
 delete mode 100644 src/llama_stack_client/types/vector_io_query_params.py
 delete mode 100644 src/llama_stack_client/types/vector_store.py
 delete mode 100644 src/llama_stack_client/types/vector_store_create_params.py
 delete mode 100644 src/llama_stack_client/types/vector_store_delete_response.py
 delete mode 100644 src/llama_stack_client/types/vector_store_list_params.py
 delete mode 100644 src/llama_stack_client/types/vector_store_search_params.py
 delete mode 100644 src/llama_stack_client/types/vector_store_search_response.py
 delete mode 100644 src/llama_stack_client/types/vector_store_update_params.py
 delete mode 100644 src/llama_stack_client/types/vector_stores/__init__.py
 delete mode 100644 src/llama_stack_client/types/vector_stores/file_create_params.py
 delete mode 100644 src/llama_stack_client/types/vector_stores/vector_store_file.py
 delete mode 100644 src/llama_stack_client/types/version_info.py
 delete mode 100644 tests/api_resources/agents/test_session.py
 delete mode 100644 tests/api_resources/agents/test_steps.py
 delete mode 100644 tests/api_resources/agents/test_turn.py
 delete mode 100644 tests/api_resources/chat/__init__.py
 delete mode 100644 tests/api_resources/chat/test_completions.py
 delete mode 100644 tests/api_resources/eval/__init__.py
 delete mode 100644 tests/api_resources/eval/test_jobs.py
 delete mode 100644 tests/api_resources/post_training/__init__.py
 delete mode 100644 tests/api_resources/post_training/test_job.py
 delete mode 100644 tests/api_resources/responses/__init__.py
 delete mode 100644 tests/api_resources/responses/test_input_items.py
 rename tests/api_resources/{agents => store}/__init__.py (100%)
 create mode 100644 tests/api_resources/store/test_order.py
 delete mode 100644 tests/api_resources/test_agents.py
 delete mode 100644 tests/api_resources/test_benchmarks.py
 delete mode 100644 tests/api_resources/test_completions.py
 delete mode 100644 tests/api_resources/test_datasets.py
 delete mode 100644 tests/api_resources/test_embeddings.py
 delete mode 100644 tests/api_resources/test_eval.py
 delete mode 100644 tests/api_resources/test_files.py
 delete mode 100644 tests/api_resources/test_inference.py
 delete mode 100644 tests/api_resources/test_inspect.py
 delete mode 100644 tests/api_resources/test_models.py
 create mode 100644 tests/api_resources/test_pet.py
 delete mode 100644 tests/api_resources/test_post_training.py
 delete mode 100644 tests/api_resources/test_providers.py
 delete mode 100644 tests/api_resources/test_responses.py
 delete mode 100644 tests/api_resources/test_routes.py
 delete mode 100644 tests/api_resources/test_safety.py
 delete mode 100644 tests/api_resources/test_scoring.py
 delete mode 100644 tests/api_resources/test_scoring_functions.py
 delete mode 100644 tests/api_resources/test_shields.py
 create mode 100644 tests/api_resources/test_store.py
 delete mode 100644 tests/api_resources/test_synthetic_data_generation.py
 delete mode 100644 tests/api_resources/test_telemetry.py
 delete mode 100644 tests/api_resources/test_tool_runtime.py
 delete mode 100644 tests/api_resources/test_toolgroups.py
 delete mode 100644 tests/api_resources/test_tools.py
 create mode 100644 tests/api_resources/test_user.py
 delete mode 100644 tests/api_resources/test_vector_dbs.py
 delete mode 100644 tests/api_resources/test_vector_io.py
 delete mode 100644 tests/api_resources/test_vector_stores.py
 delete mode 100644 tests/api_resources/tool_runtime/__init__.py
 delete mode 100644 tests/api_resources/tool_runtime/test_rag_tool.py
 delete mode 100644 tests/api_resources/vector_stores/__init__.py
 delete mode 100644 tests/api_resources/vector_stores/test_files.py
 delete mode 100644 uv.lock

diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
index ac9a2e75..ff261bad 100644
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -3,7 +3,7 @@ FROM mcr.microsoft.com/vscode/devcontainers/python:0-${VARIANT}
 
 USER vscode
 
-RUN curl -sSf https://rye.astral.sh/get | RYE_VERSION="0.35.0" RYE_INSTALL_OPTION="--yes" bash
+RUN curl -sSf https://rye.astral.sh/get | RYE_VERSION="0.44.0" RYE_INSTALL_OPTION="--yes" bash
 ENV PATH=/home/vscode/.rye/shims:$PATH
 
-RUN echo "[[ -d .venv ]] && source .venv/bin/activate" >> /home/vscode/.bashrc
+RUN echo "[[ -d .venv ]] && source .venv/bin/activate || export PATH=\$PATH" >> /home/vscode/.bashrc
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index 66df046a..c17fdc16 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -14,7 +14,7 @@
       "extensions": [
         "ms-python.python"
       ],
-      "settings": {
+      "settings": { 
         "terminal.integrated.shell.linux": "/bin/bash",
         "python.pythonPath": ".venv/bin/python",
         "python.defaultInterpreterPath": ".venv/bin/python",
@@ -24,6 +24,9 @@
         }
       }
     }
+  },
+  "features": {
+    "ghcr.io/devcontainers/features/node:1": {}
   }
 
   // Features to add to the dev container. More info: https://containers.dev/features.
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
deleted file mode 100644
index 3f946652..00000000
--- a/.github/CODEOWNERS
+++ /dev/null
@@ -1,5 +0,0 @@
-# Each line is a file pattern followed by one or more owners.
-
-# These owners will be the default owners for everything in
-# the repo. Unless a later match takes precedence,
-* @ashwinb @yanxi0830 @hardikjshah @dltn @raghotham @dineshyv @vladimirivic @ehhuang @SLR722 @reluctantfuturist
diff --git a/.github/ISSUE_TEMPLATE/bug.yml b/.github/ISSUE_TEMPLATE/bug.yml
deleted file mode 100644
index 1f7dabb9..00000000
--- a/.github/ISSUE_TEMPLATE/bug.yml
+++ /dev/null
@@ -1,77 +0,0 @@
-name: 🐛 Bug Report
-description: Create a report to help us reproduce and fix the bug
-
-body:
-  - type: markdown
-    attributes:
-      value: >
-        #### Before submitting a bug, please make sure the issue hasn't been already addressed by searching through [the
-        existing and past issues](https://github.com/meta-llama/llama-stack/issues).
-
-  - type: textarea
-    id: system-info
-    attributes:
-      label: System Info
-      description: |
-        Please share your system info with us. You can use the following command to capture your environment information
-        python -m "torch.utils.collect_env"
-
-      placeholder: |
-        PyTorch version, CUDA version, GPU type, #num of GPUs...
-    validations:
-      required: true
-
-  - type: checkboxes
-    id: information-scripts-examples
-    attributes:
-      label: Information
-      description: 'The problem arises when using:'
-      options:
-        - label: "The official example scripts"
-        - label: "My own modified scripts"
-
-  - type: textarea
-    id: bug-description
-    attributes:
-      label: 🐛 Describe the bug
-      description: |
-        Please provide a clear and concise description of what the bug is.
-
-        Please also paste or describe the results you observe instead of the expected results.
-      placeholder: |
-        A clear and concise description of what the bug is.
-
-        ```llama stack
-        # Command that you used for running the examples
-        ```
-        Description of the results
-    validations:
-      required: true
-
-  - type: textarea
-    attributes:
-      label: Error logs
-      description: |
-       If you observe an error, please paste the error message including the **full** traceback of the exception. It may be relevant to wrap error messages in ```` ```triple quotes blocks``` ````.
-
-      placeholder: |
-        ```
-        The error message you got, with the full traceback.
-        ```
-
-    validations:
-      required: true
-
-
-  - type: textarea
-    id: expected-behavior
-    validations:
-      required: true
-    attributes:
-      label: Expected behavior
-      description: "A clear and concise description of what you would expect to happen."
-
-  - type: markdown
-    attributes:
-      value: >
-        Thanks for contributing 🎉!
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
deleted file mode 100644
index 044518ab..00000000
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ /dev/null
@@ -1,11 +0,0 @@
-# What does this PR do?
-[Provide a short summary of what this PR does and why. Link to relevant issues if applicable.]
-
-[//]: # (If resolving an issue, uncomment and update the line below)
-[//]: # (Closes #[issue-number])
-
-## Test Plan
-[Describe the tests you ran to verify your changes with result summaries. *Provide clear instructions so the plan can be easily re-executed.*]
-
-[//]: # (## Documentation)
-[//]: # (- [ ] Added a Changelog entry if the change is significant)
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 00000000..10db7de5
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,96 @@
+name: CI
+on:
+  push:
+    branches-ignore:
+      - 'generated'
+      - 'codegen/**'
+      - 'integrated/**'
+      - 'stl-preview-head/**'
+      - 'stl-preview-base/**'
+  pull_request:
+    branches-ignore:
+      - 'stl-preview-head/**'
+      - 'stl-preview-base/**'
+
+jobs:
+  lint:
+    timeout-minutes: 10
+    name: lint
+    runs-on: ${{ github.repository == 'stainless-sdks/llama-stack-cli-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }}
+    if: github.event_name == 'push' || github.event.pull_request.head.repo.fork
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install Rye
+        run: |
+          curl -sSf https://rye.astral.sh/get | bash
+          echo "$HOME/.rye/shims" >> $GITHUB_PATH
+        env:
+          RYE_VERSION: '0.44.0'
+          RYE_INSTALL_OPTION: '--yes'
+
+      - name: Install dependencies
+        run: rye sync --all-features
+
+      - name: Run lints
+        run: ./scripts/lint
+
+  build:
+    if: github.repository == 'stainless-sdks/llama-stack-cli-python' && (github.event_name == 'push' || github.event.pull_request.head.repo.fork)
+    timeout-minutes: 10
+    name: build
+    permissions:
+      contents: read
+      id-token: write
+    runs-on: depot-ubuntu-24.04
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install Rye
+        run: |
+          curl -sSf https://rye.astral.sh/get | bash
+          echo "$HOME/.rye/shims" >> $GITHUB_PATH
+        env:
+          RYE_VERSION: '0.44.0'
+          RYE_INSTALL_OPTION: '--yes'
+
+      - name: Install dependencies
+        run: rye sync --all-features
+
+      - name: Run build
+        run: rye build
+
+      - name: Get GitHub OIDC Token
+        id: github-oidc
+        uses: actions/github-script@v6
+        with:
+          script: core.setOutput('github_token', await core.getIDToken());
+
+      - name: Upload tarball
+        env:
+          URL: https://pkg.stainless.com/s
+          AUTH: ${{ steps.github-oidc.outputs.github_token }}
+          SHA: ${{ github.sha }}
+        run: ./scripts/utils/upload-artifact.sh
+
+  test:
+    timeout-minutes: 10
+    name: test
+    runs-on: ${{ github.repository == 'stainless-sdks/llama-stack-cli-python' && 'depot-ubuntu-24.04' || 'ubuntu-latest' }}
+    if: github.event_name == 'push' || github.event.pull_request.head.repo.fork
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install Rye
+        run: |
+          curl -sSf https://rye.astral.sh/get | bash
+          echo "$HOME/.rye/shims" >> $GITHUB_PATH
+        env:
+          RYE_VERSION: '0.44.0'
+          RYE_INSTALL_OPTION: '--yes'
+
+      - name: Bootstrap
+        run: ./scripts/bootstrap
+
+      - name: Run tests
+        run: ./scripts/test
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
deleted file mode 100644
index 58853453..00000000
--- a/.github/workflows/pre-commit.yml
+++ /dev/null
@@ -1,25 +0,0 @@
-name: Pre-commit
-
-on:
-  pull_request:
-  push:
-    branches: [main]
-
-jobs:
-  pre-commit:
-    runs-on: ubuntu-latest
-
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
-
-      - name: Set up Python
-        uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
-        with:
-          python-version: '3.11.10'
-          cache: pip
-          cache-dependency-path: |
-            **/requirements*.txt
-            .pre-commit-config.yaml
-
-      - uses: pre-commit/action@v3.0.1
diff --git a/.github/workflows/publish-to-test-pypi.yml b/.github/workflows/publish-to-test-pypi.yml
deleted file mode 100644
index a669c93f..00000000
--- a/.github/workflows/publish-to-test-pypi.yml
+++ /dev/null
@@ -1,76 +0,0 @@
-name: Publish Python 🐍 distribution 📦 to TestPyPI
-
-on:
-  repository_dispatch:  # on trigger from llama-stack
-    types: [build-client-package]
-
-  workflow_dispatch:  # Keep manual trigger
-    inputs:
-      version:
-        description: 'Version number (e.g. 0.0.63.dev20250111)'
-        required: true
-        type: string
-
-jobs:
-  build:
-    name: Build distribution 📦
-    runs-on: ubuntu-latest
-
-    steps:
-    - uses: actions/checkout@v4
-      with:
-        persist-credentials: false
-    - name: Get date
-      id: date
-      run: echo "date=$(date +'%Y%m%d')" >> $GITHUB_OUTPUT
-    - name: Update version for manual RC
-      if: github.event_name == 'workflow_dispatch'
-      run: |
-        sed -i 's/version = "\([^"]*\)"/version = "${{ inputs.version }}"/' pyproject.toml
-        sed -i 's/__version__ = "\([^"]*\)"/__version__ = "${{ inputs.version }}"/' src/llama_stack_client/_version.py
-    - name: Update version for repository_dispatch
-      if: github.event_name == 'repository_dispatch' && github.event.client_payload.source == 'llama-stack-nightly'
-      run: |
-        sed -i 's/version = "\([^"]*\)"/version = "${{ github.event.client_payload.version }}"/' pyproject.toml
-        sed -i 's/__version__ = "\([^"]*\)"/__version__ = "${{ github.event.client_payload.version }}"/' src/llama_stack_client/_version.py
-    - name: Set up Python
-      uses: actions/setup-python@v5
-      with:
-        python-version: "3.11"
-    - name: Install pypa/build
-      run: >-
-        python3 -m
-        pip install
-        build
-        --user
-    - name: Build a binary wheel and a source tarball
-      run: python3 -m build
-    - name: Store the distribution packages
-      uses: actions/upload-artifact@v4
-      with:
-        name: python-package-distributions
-        path: dist/
-
-  publish-to-testpypi:
-    name: Publish Python 🐍 distribution 📦 to TestPyPI
-    needs:
-    - build
-    runs-on: ubuntu-latest
-
-    environment:
-      name: testrelease
-      url: https://test.pypi.org/p/llama-stack-client
-
-    permissions:
-      id-token: write  # IMPORTANT: mandatory for trusted publishing
-
-    steps:
-    - name: Download all the dists
-      uses: actions/download-artifact@v4
-      with:
-        name: python-package-distributions
-        path: dist/
-    - name: Publish distribution 📦 to TestPyPI
-      uses: pypa/gh-action-pypi-publish@release/v1
-      with:
-        repository-url: https://test.pypi.org/legacy/
diff --git a/.gitignore b/.gitignore
index bcfc2a3c..87797408 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,4 +14,3 @@ dist
 .envrc
 codegen.log
 Brewfile.lock.json
-.DS_Store
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
deleted file mode 100644
index 0571dee6..00000000
--- a/.pre-commit-config.yaml
+++ /dev/null
@@ -1,74 +0,0 @@
-exclude: 'build/'
-
-default_language_version:
-    python: python3
-
-repos:
--   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v5.0.0  # Latest stable version
-    hooks:
-    -   id: check-merge-conflict
-    -   id: check-added-large-files
-        args: ['--maxkb=1000']
-    -   id: end-of-file-fixer
-        exclude: '^(.*\.svg)$'
-
--   repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.9.4
-    hooks:
-    -   id: ruff
-        files: ^src/llama_stack_client/lib/.*
-        args: [
-            --fix,
-            --exit-non-zero-on-fix
-        ]
-    -   id: ruff-format
-        files: ^src/llama_stack_client/lib/.*
-
--   repo: https://github.com/adamchainz/blacken-docs
-    rev: 1.19.0
-    hooks:
-    -   id: blacken-docs
-        files: ^src/llama_stack_client/lib/.*
-        additional_dependencies:
-        - black==24.3.0
-
-# -   repo: https://github.com/pre-commit/mirrors-mypy
-#     rev: v1.14.0
-#     hooks:
-#     -   id: mypy
-#         additional_dependencies:
-#           - types-requests
-#           - types-setuptools
-#           - pydantic
-#         args: [--ignore-missing-imports]
-
-# - repo: https://github.com/jsh9/pydoclint
-#   rev: d88180a8632bb1602a4d81344085cf320f288c5a
-#   hooks:
-#     - id: pydoclint
-#       args: [--config=pyproject.toml]
-
-# - repo: https://github.com/tcort/markdown-link-check
-#   rev: v3.11.2
-#   hooks:
-#     - id: markdown-link-check
-#       args: ['--quiet']
-
-# -   repo: local
-#     hooks:
-#       - id: distro-codegen
-#         name: Distribution Template Codegen
-#         additional_dependencies:
-#           - rich
-#           - pydantic
-#         entry: python -m llama_stack.scripts.distro_codegen
-#         language: python
-#         pass_filenames: false
-#         require_serial: true
-#         files: ^llama_stack/templates/.*$
-#         stages: [manual]
-
-ci:
-    autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks
-    autoupdate_commit_msg: ⬆ [pre-commit.ci] pre-commit autoupdate
diff --git a/.python-version b/.python-version
index e4fba218..43077b24 100644
--- a/.python-version
+++ b/.python-version
@@ -1 +1 @@
-3.12
+3.9.18
diff --git a/.ruff.toml b/.ruff.toml
deleted file mode 100644
index a913ae69..00000000
--- a/.ruff.toml
+++ /dev/null
@@ -1,37 +0,0 @@
-# Suggested config from pytorch that we can adapt
-lint.select = ["B", "C", "E" , "F" , "N", "W", "B9"]
-
-line-length = 120
-
-# C408 ignored because we like the dict keyword argument syntax
-# E501 is not flexible enough, we're using B950 instead
-# N812 ignored because import torch.nn.functional as F is PyTorch convention
-# N817 ignored because importing using acronyms is convention (DistributedDataParallel as DDP)
-# E731 allow usage of assigning lambda expressions
-# E701 let black auto-format statements on one line
-# E704 let black auto-format statements on one line
-lint.ignore = [
-    "E203", "E305", "E402", "E501", "E721", "E741", "F405", "F821", "F841",
-    "C408", "E302", "W291", "E303", "N812", "N817", "E731", "E701",
-    # These are the additional ones we started ignoring after moving to ruff. We should look into each one of them later.
-    "C901", "C405", "C414", "N803", "N999", "C403", "C416", "B028", "C419", "C401", "B023",
-    # shebang has extra meaning in fbcode lints, so I think it's not worth trying
-    # to line this up with executable bit
-    "EXE001",
-    # random naming hints don't need
-    "N802",
-    # these ignores are from flake8-bugbear; please fix!
-    "B007", "B008"
-]
-
-exclude = [
-    "./.git",
-    "./docs/*",
-    "./build",
-    "./scripts",
-    "./venv",
-    "*.pyi",
-    ".pre-commit-config.yaml",
-    "*.md",
-    ".flake8"
-]
diff --git a/.stats.yml b/.stats.yml
index 4517049a..6fcf58f8 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,2 +1,4 @@
-configured_endpoints: 51
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/meta%2Fllama-stack-d52e4c19360cc636336d6a60ba6af1db89736fc0a3025c2b1d11870a5f1a1e3d.yml
+configured_endpoints: 19
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/meta-slekkala1%2Fllama-stack-cli-6937085190e3e5553f943ff22deda900cd8ad4bf5e37278cba7de683b78ae8d2.yml
+openapi_spec_hash: 85dc5d1e011be6539c240594f06f284b
+config_hash: 96cc2b0706a245b6a0a784aa7dbfe779
diff --git a/Brewfile b/Brewfile
index fc55cbe7..492ca37b 100644
--- a/Brewfile
+++ b/Brewfile
@@ -1 +1,2 @@
 brew "rye"
+
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
deleted file mode 100644
index 3232ed66..00000000
--- a/CODE_OF_CONDUCT.md
+++ /dev/null
@@ -1,80 +0,0 @@
-# Code of Conduct
-
-## Our Pledge
-
-In the interest of fostering an open and welcoming environment, we as
-contributors and maintainers pledge to make participation in our project and
-our community a harassment-free experience for everyone, regardless of age, body
-size, disability, ethnicity, sex characteristics, gender identity and expression,
-level of experience, education, socio-economic status, nationality, personal
-appearance, race, religion, or sexual identity and orientation.
-
-## Our Standards
-
-Examples of behavior that contributes to creating a positive environment
-include:
-
-* Using welcoming and inclusive language
-* Being respectful of differing viewpoints and experiences
-* Gracefully accepting constructive criticism
-* Focusing on what is best for the community
-* Showing empathy towards other community members
-
-Examples of unacceptable behavior by participants include:
-
-* The use of sexualized language or imagery and unwelcome sexual attention or
-advances
-* Trolling, insulting/derogatory comments, and personal or political attacks
-* Public or private harassment
-* Publishing others' private information, such as a physical or electronic
-address, without explicit permission
-* Other conduct which could reasonably be considered inappropriate in a
-professional setting
-
-## Our Responsibilities
-
-Project maintainers are responsible for clarifying the standards of acceptable
-behavior and are expected to take appropriate and fair corrective action in
-response to any instances of unacceptable behavior.
-
-Project maintainers have the right and responsibility to remove, edit, or
-reject comments, commits, code, wiki edits, issues, and other contributions
-that are not aligned to this Code of Conduct, or to ban temporarily or
-permanently any contributor for other behaviors that they deem inappropriate,
-threatening, offensive, or harmful.
-
-## Scope
-
-This Code of Conduct applies within all project spaces, and it also applies when
-an individual is representing the project or its community in public spaces.
-Examples of representing a project or community include using an official
-project e-mail address, posting via an official social media account, or acting
-as an appointed representative at an online or offline event. Representation of
-a project may be further defined and clarified by project maintainers.
-
-This Code of Conduct also applies outside the project spaces when there is a
-reasonable belief that an individual's behavior may have a negative impact on
-the project or its community.
-
-## Enforcement
-
-Instances of abusive, harassing, or otherwise unacceptable behavior may be
-reported by contacting the project team at <opensource-conduct@meta.com>. All
-complaints will be reviewed and investigated and will result in a response that
-is deemed necessary and appropriate to the circumstances. The project team is
-obligated to maintain confidentiality with regard to the reporter of an incident.
-Further details of specific enforcement policies may be posted separately.
-
-Project maintainers who do not follow or enforce the Code of Conduct in good
-faith may face temporary or permanent repercussions as determined by other
-members of the project's leadership.
-
-## Attribution
-
-This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
-available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
-
-[homepage]: https://www.contributor-covenant.org
-
-For answers to common questions about this code of conduct, see
-https://www.contributor-covenant.org/faq
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 7cdb9833..ff4e9454 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,22 +1,59 @@
 ## Setting up the environment
 
-### With UV
+### With Rye
 
-We use [UV](https://docs.astral.sh/uv/) to manage dependencies so we highly recommend [installing it](https://docs.astral.sh/uv/installation/) as it will automatically provision a Python environment with the expected Python version.
+We use [Rye](https://rye.astral.sh/) to manage dependencies because it will automatically provision a Python environment with the expected Python version. To set it up, run:
 
-After installing UV, you'll just have to run this command:
+```sh
+$ ./scripts/bootstrap
+```
 
-```bash
-uv sync
+Or [install Rye manually](https://rye.astral.sh/guide/installation/) and run:
+
+```sh
+$ rye sync --all-features
 ```
 
+You can then run scripts using `rye run python script.py` or by activating the virtual environment:
+
+```sh
+# Activate the virtual environment - https://docs.python.org/3/library/venv.html#how-venvs-work
+$ source .venv/bin/activate
+
+# now you can omit the `rye run` prefix
+$ python script.py
+```
 
+### Without Rye
+
+Alternatively if you don't want to install `Rye`, you can stick with the standard `pip` setup by ensuring you have the Python version specified in `.python-version`, create a virtual environment however you desire and then install dependencies using this command:
+
+```sh
+$ pip install -r requirements-dev.lock
+```
 
 ## Modifying/Adding code
 
 Most of the SDK is generated code. Modifications to code will be persisted between generations, but may
 result in merge conflicts between manual patches and changes from the generator. The generator will never
-modify the contents of the `src/llama_stack_client/lib/` and `examples/` directories.
+modify the contents of the `src/llama_stack_cli/lib/` and `examples/` directories.
+
+## Adding and running examples
+
+All files in the `examples/` directory are not modified by the generator and can be freely edited or added to.
+
+```py
+# add an example to examples/<your-example>.py
+
+#!/usr/bin/env -S rye run python
+…
+```
+
+```sh
+$ chmod +x examples/<your-example>.py
+# run the example against your api
+$ ./examples/<your-example>.py
+```
 
 ## Using the repository from source
 
@@ -24,8 +61,8 @@ If you’d like to use the repository from source, you can either install from g
 
 To install via git:
 
-```bash
-uv pip install git+ssh://git@github.com/stainless-sdks/llama-stack-python.git
+```sh
+$ pip install git+ssh://git@github.com/stainless-sdks/llama-stack-cli-python.git
 ```
 
 Alternatively, you can build from source and install the wheel file:
@@ -34,37 +71,46 @@ Building this package will create two files in the `dist/` directory, a `.tar.gz
 
 To create a distributable version of the library, all you have to do is run this command:
 
-```bash
-uv build
+```sh
+$ rye build
+# or
+$ python -m build
 ```
 
 Then to install:
 
 ```sh
-uv pip install ./path-to-wheel-file.whl
+$ pip install ./path-to-wheel-file.whl
 ```
 
 ## Running tests
 
 Most tests require you to [set up a mock server](https://github.com/stoplightio/prism) against the OpenAPI spec to run the tests.
 
-```bash
+```sh
 # you will need npm installed
-npx prism mock path/to/your/openapi.yml
+$ npx prism mock path/to/your/openapi.yml
 ```
 
-```bash
-uv run pytest
+```sh
+$ ./scripts/test
 ```
 
 ## Linting and formatting
 
-There is a pre-commit hook that will run ruff and black on the code.
+This repository uses [ruff](https://github.com/astral-sh/ruff) and
+[black](https://github.com/psf/black) to format the code in the repository.
 
-To run the pre-commit hook:
+To lint:
 
-```bash
-uv run pre-commit
+```sh
+$ ./scripts/lint
+```
+
+To format and fix all ruff issues automatically:
+
+```sh
+$ ./scripts/format
 ```
 
 ## Publishing and releases
@@ -74,7 +120,7 @@ the changes aren't made through the automated pipeline, you may want to make rel
 
 ### Publish with a GitHub workflow
 
-You can release to package managers by using [the `Publish PyPI` GitHub action](https://www.github.com/stainless-sdks/llama-stack-python/actions/workflows/publish-pypi.yml). This requires a setup organization or repository secret to be set up.
+You can release to package managers by using [the `Publish PyPI` GitHub action](https://www.github.com/stainless-sdks/llama-stack-cli-python/actions/workflows/publish-pypi.yml). This requires a setup organization or repository secret to be set up.
 
 ### Publish manually
 
diff --git a/LICENSE b/LICENSE
index c781a0e2..f2a56793 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,22 +1,201 @@
-MIT License
-
-Copyright (c) Meta Platforms, Inc. and affiliates
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice shall be
-included in all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
-LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2025 Llama Stack Cli
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/README.md b/README.md
index 2f2694b0..4f204675 100644
--- a/README.md
+++ b/README.md
@@ -1,104 +1,74 @@
-# Llama Stack Client Python API library
+# Llama Stack Cli Python API library
 
-[![PyPI version](https://img.shields.io/pypi/v/llama_stack_client.svg)](https://pypi.org/project/llama_stack_client/) [![PyPI - Downloads](https://img.shields.io/pypi/dm/llama-stack-client)](https://pypi.org/project/llama-stack-client/)
-[![Discord](https://img.shields.io/discord/1257833999603335178)](https://discord.gg/llama-stack)
+<!-- prettier-ignore -->
+[![PyPI version](https://img.shields.io/pypi/v/llama_stack_cli.svg?label=pypi%20(stable))](https://pypi.org/project/llama_stack_cli/)
 
-The Llama Stack Client Python library provides convenient access to the Llama Stack Client REST API from any Python 3.7+
+The Llama Stack Cli Python library provides convenient access to the Llama Stack Cli REST API from any Python 3.8+
 application. The library includes type definitions for all request params and response fields,
 and offers both synchronous and asynchronous clients powered by [httpx](https://github.com/encode/httpx).
 
-It is generated with [Stainless](https://www.stainlessapi.com/).
+It is generated with [Stainless](https://www.stainless.com/).
 
 ## Documentation
 
-For starting up a Llama Stack server, please checkout our guides in our [llama-stack](https://github.com/meta-llama/llama-stack/blob/main/docs/resources/llama-stack-spec.html) repo.
-
-The REST API documentation can be found on our [llama-stack OpenAPI spec](https://github.com/meta-llama/llama-stack/blob/main/docs/resources/llama-stack-spec.html). The full API of this library can be found in [api.md](api.md).
-
-You can find more example apps with client SDKs to talk with the Llama Stack server in our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main) repo.
+The full API of this library can be found in [api.md](api.md).
 
 ## Installation
 
 ```sh
-pip install llama-stack-client
+# install from this staging repo
+pip install git+ssh://git@github.com/stainless-sdks/llama-stack-cli-python.git
 ```
 
+> [!NOTE]
+> Once this package is [published to PyPI](https://www.stainless.com/docs/guides/publish), this will become: `pip install --pre llama_stack_cli`
+
 ## Usage
 
-The full API of this library can be found in [api.md](api.md). You may find basic client examples in our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main) repo.
+The full API of this library can be found in [api.md](api.md).
 
 ```python
-from llama_stack_client import LlamaStackClient
+import os
+from llama_stack_cli import LlamaStackCli
 
-client = LlamaStackClient(
-    base_url=f"http://{host}:{port}",
+client = LlamaStackCli(
+    api_key=os.environ.get("PETSTORE_API_KEY"),  # This is the default and can be omitted
 )
 
-response = client.chat.completions.create(
-    messages=[{"role": "user", "content": "hello world, write me a 2 sentence poem about the moon"}],
-    model="meta-llama/Llama-3.2-3B-Instruct",
-    stream=False,
+order = client.store.order.create(
+    pet_id=1,
+    quantity=1,
+    status="placed",
 )
-print(response)
+print(order.id)
 ```
 
-After installing the `llama-stack-client` package, you can also use the [`llama-stack-client` CLI](https://github.com/meta-llama/llama-stack/tree/main/llama-stack-client) to interact with the Llama Stack server.
-```bash
-llama-stack-client inference chat-completion --message "hello, what model are you"
-```
-
-```python
-OpenAIChatCompletion(
-    id='AmivnS0iMv-mmEE4_A0DK1T',
-    choices=[
-        OpenAIChatCompletionChoice(
-            finish_reason='stop',
-            index=0,
-            message=OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParam(
-                role='assistant',
-                content="Hello! I am an AI designed by Meta AI, and my model is a type of recurrent neural network (RNN) called a transformer. My specific architecture is based on the BERT (Bidirectional Encoder Representations from Transformers) model, which is a pre-trained language model that has been fine-tuned for a variety of natural language processing tasks.\n\nHere are some key details about my model:\n\n* **Model type:** Transformer-based language model\n* **Architecture:** BERT (Bidirectional Encoder Representations from Transformers)\n* **Training data:** A massive corpus of text data, including but not limited to:\n\t+ Web pages\n\t+ Books\n\t+ Articles\n\t+ Forums\n\t+ Social media platforms\n* **Parameters:** My model has approximately 1.5 billion parameters, which allows me to understand and generate human-like language.\n* **Capabilities:** I can perform a wide range of tasks, including but not limited to:\n\t+ Answering questions\n\t+ Generating text\n\t+ Translating languages\n\t+ Summarizing content\n\t+ Offering suggestions and ideas\n\nI'm constantly learning and improving, so please bear with me if I make any mistakes or don't quite understand what you're asking. How can I assist you today?",
-                name=None,
-                tool_calls=None,
-                function_call=None
-            ),
-            logprobs=OpenAIChatCompletionChoiceLogprobs(content=None, refusal=None)
-        )
-    ],
-    created=1749825661,
-    model='Llama-3.3-70B-Instruct',
-    object='chat.completion',
-    system_fingerprint=None,
-    usage={
-        'completion_tokens': 258,
-        'prompt_tokens': 16,
-        'total_tokens': 274,
-        'completion_tokens_details': None,
-        'prompt_tokens_details': None
-    },
-    service_tier=None
-)
-```
+While you can provide an `api_key` keyword argument,
+we recommend using [python-dotenv](https://pypi.org/project/python-dotenv/)
+to add `PETSTORE_API_KEY="My API Key"` to your `.env` file
+so that your API Key is not stored in source control.
 
 ## Async usage
 
-Simply import `AsyncLlamaStackClient` instead of `LlamaStackClient` and use `await` with each API call:
+Simply import `AsyncLlamaStackCli` instead of `LlamaStackCli` and use `await` with each API call:
 
 ```python
+import os
 import asyncio
-from llama_stack_client import AsyncLlamaStackClient
+from llama_stack_cli import AsyncLlamaStackCli
 
-client = AsyncLlamaStackClient(
-    # defaults to "production".
-    environment="sandbox",
+client = AsyncLlamaStackCli(
+    api_key=os.environ.get("PETSTORE_API_KEY"),  # This is the default and can be omitted
 )
 
 
 async def main() -> None:
-    session = await client.agents.sessions.create(
-        agent_id="agent_id",
-        session_name="session_name",
+    order = await client.store.order.create(
+        pet_id=1,
+        quantity=1,
+        status="placed",
     )
-    print(session.session_id)
+    print(order.id)
 
 
 asyncio.run(main())
@@ -106,6 +76,41 @@ asyncio.run(main())
 
 Functionality between the synchronous and asynchronous clients is otherwise identical.
 
+### With aiohttp
+
+By default, the async client uses `httpx` for HTTP requests. However, for improved concurrency performance you may also use `aiohttp` as the HTTP backend.
+
+You can enable this by installing `aiohttp`:
+
+```sh
+# install from this staging repo
+pip install 'llama_stack_cli[aiohttp] @ git+ssh://git@github.com/stainless-sdks/llama-stack-cli-python.git'
+```
+
+Then you can enable it by instantiating the client with `http_client=DefaultAioHttpClient()`:
+
+```python
+import asyncio
+from llama_stack_cli import DefaultAioHttpClient
+from llama_stack_cli import AsyncLlamaStackCli
+
+
+async def main() -> None:
+    async with AsyncLlamaStackCli(
+        api_key="My API Key",
+        http_client=DefaultAioHttpClient(),
+    ) as client:
+        order = await client.store.order.create(
+            pet_id=1,
+            quantity=1,
+            status="placed",
+        )
+        print(order.id)
+
+
+asyncio.run(main())
+```
+
 ## Using types
 
 Nested request parameters are [TypedDicts](https://docs.python.org/3/library/typing.html#typing.TypedDict). Responses are [Pydantic models](https://docs.pydantic.dev) which also provide helper methods for things like:
@@ -115,38 +120,52 @@ Nested request parameters are [TypedDicts](https://docs.python.org/3/library/typ
 
 Typed requests and responses provide autocomplete and documentation within your editor. If you would like to see type errors in VS Code to help catch bugs earlier, set `python.analysis.typeCheckingMode` to `basic`.
 
+## Nested params
+
+Nested parameters are dictionaries, typed using `TypedDict`, for example:
+
+```python
+from llama_stack_cli import LlamaStackCli
+
+client = LlamaStackCli()
+
+pet = client.pet.create(
+    name="doggie",
+    photo_urls=["string"],
+    category={},
+)
+print(pet.category)
+```
+
 ## Handling errors
 
-When the library is unable to connect to the API (for example, due to network connection problems or a timeout), a subclass of `llama_stack_client.APIConnectionError` is raised.
+When the library is unable to connect to the API (for example, due to network connection problems or a timeout), a subclass of `llama_stack_cli.APIConnectionError` is raised.
 
 When the API returns a non-success status code (that is, 4xx or 5xx
-response), a subclass of `llama_stack_client.APIStatusError` is raised, containing `status_code` and `response` properties.
+response), a subclass of `llama_stack_cli.APIStatusError` is raised, containing `status_code` and `response` properties.
 
-All errors inherit from `llama_stack_client.APIError`.
+All errors inherit from `llama_stack_cli.APIError`.
 
 ```python
-import llama_stack_client
-from llama_stack_client import LlamaStackClient
+import llama_stack_cli
+from llama_stack_cli import LlamaStackCli
 
-client = LlamaStackClient()
+client = LlamaStackCli()
 
 try:
-    client.agents.sessions.create(
-        agent_id="agent_id",
-        session_name="session_name",
-    )
-except llama_stack_client.APIConnectionError as e:
+    client.store.list_inventory()
+except llama_stack_cli.APIConnectionError as e:
     print("The server could not be reached")
     print(e.__cause__)  # an underlying Exception, likely raised within httpx.
-except llama_stack_client.RateLimitError as e:
+except llama_stack_cli.RateLimitError as e:
     print("A 429 status code was received; we should back off a bit.")
-except llama_stack_client.APIStatusError as e:
+except llama_stack_cli.APIStatusError as e:
     print("Another non-200-range status code was received")
     print(e.status_code)
     print(e.response)
 ```
 
-Error codes are as followed:
+Error codes are as follows:
 
 | Status Code | Error Type                 |
 | ----------- | -------------------------- |
@@ -168,45 +187,39 @@ Connection errors (for example, due to a network connectivity problem), 408 Requ
 You can use the `max_retries` option to configure or disable retry settings:
 
 ```python
-from llama_stack_client import LlamaStackClient
+from llama_stack_cli import LlamaStackCli
 
 # Configure the default for all requests:
-client = LlamaStackClient(
+client = LlamaStackCli(
     # default is 2
     max_retries=0,
 )
 
 # Or, configure per-request:
-client.with_options(max_retries=5).agents.sessions.create(
-    agent_id="agent_id",
-    session_name="session_name",
-)
+client.with_options(max_retries=5).store.list_inventory()
 ```
 
 ### Timeouts
 
 By default requests time out after 1 minute. You can configure this with a `timeout` option,
-which accepts a float or an [`httpx.Timeout`](https://www.python-httpx.org/advanced/#fine-tuning-the-configuration) object:
+which accepts a float or an [`httpx.Timeout`](https://www.python-httpx.org/advanced/timeouts/#fine-tuning-the-configuration) object:
 
 ```python
-from llama_stack_client import LlamaStackClient
+from llama_stack_cli import LlamaStackCli
 
 # Configure the default for all requests:
-client = LlamaStackClient(
+client = LlamaStackCli(
     # 20 seconds (default is 1 minute)
     timeout=20.0,
 )
 
 # More granular control:
-client = LlamaStackClient(
+client = LlamaStackCli(
     timeout=httpx.Timeout(60.0, read=5.0, write=10.0, connect=2.0),
 )
 
 # Override per-request:
-client.with_options(timeout=5.0).agents.sessions.create(
-    agent_id="agent_id",
-    session_name="session_name",
-)
+client.with_options(timeout=5.0).store.list_inventory()
 ```
 
 On timeout, an `APITimeoutError` is thrown.
@@ -219,12 +232,14 @@ Note that requests that time out are [retried twice by default](#retries).
 
 We use the standard library [`logging`](https://docs.python.org/3/library/logging.html) module.
 
-You can enable logging by setting the environment variable `LLAMA_STACK_CLIENT_LOG` to `debug`.
+You can enable logging by setting the environment variable `LLAMA_STACK_CLI_LOG` to `info`.
 
 ```shell
-$ export LLAMA_STACK_CLIENT_LOG=debug
+$ export LLAMA_STACK_CLI_LOG=info
 ```
 
+Or to `debug` for more verbose logging.
+
 ### How to tell whether `None` means `null` or missing
 
 In an API response, a field may be explicitly `null`, or missing entirely; in either case, its value is `None` in this library. You can differentiate the two cases with `.model_fields_set`:
@@ -242,22 +257,19 @@ if response.my_field is None:
 The "raw" Response object can be accessed by prefixing `.with_raw_response.` to any HTTP method call, e.g.,
 
 ```py
-from llama_stack_client import LlamaStackClient
+from llama_stack_cli import LlamaStackCli
 
-client = LlamaStackClient()
-response = client.agents.sessions.with_raw_response.create(
-    agent_id="agent_id",
-    session_name="session_name",
-)
+client = LlamaStackCli()
+response = client.store.with_raw_response.list_inventory()
 print(response.headers.get('X-My-Header'))
 
-session = response.parse()  # get the object that `agents.sessions.create()` would have returned
-print(session.session_id)
+store = response.parse()  # get the object that `store.list_inventory()` would have returned
+print(store)
 ```
 
-These methods return an [`APIResponse`](https://github.com/meta-llama/llama-stack-python/tree/main/src/llama_stack_client/_response.py) object.
+These methods return an [`APIResponse`](https://github.com/stainless-sdks/llama-stack-cli-python/tree/main/src/llama_stack_cli/_response.py) object.
 
-The async client returns an [`AsyncAPIResponse`](https://github.com/meta-llama/llama-stack-python/tree/main/src/llama_stack_client/_response.py) with the same structure, the only difference being `await`able methods for reading the response content.
+The async client returns an [`AsyncAPIResponse`](https://github.com/stainless-sdks/llama-stack-cli-python/tree/main/src/llama_stack_cli/_response.py) with the same structure, the only difference being `await`able methods for reading the response content.
 
 #### `.with_streaming_response`
 
@@ -266,10 +278,7 @@ The above interface eagerly reads the full response body when you make the reque
 To stream the response body, use `.with_streaming_response` instead, which requires a context manager and only reads the response body once you call `.read()`, `.text()`, `.json()`, `.iter_bytes()`, `.iter_text()`, `.iter_lines()` or `.parse()`. In the async client, these are async methods.
 
 ```python
-with client.agents.sessions.with_streaming_response.create(
-    agent_id="agent_id",
-    session_name="session_name",
-) as response:
+with client.store.with_streaming_response.list_inventory() as response:
     print(response.headers.get("X-My-Header"))
 
     for line in response.iter_lines():
@@ -287,8 +296,7 @@ If you need to access undocumented endpoints, params, or response properties, th
 #### Undocumented endpoints
 
 To make requests to undocumented endpoints, you can make requests using `client.get`, `client.post`, and other
-http verbs. Options on the client will be respected (such as retries) will be respected when making this
-request.
+http verbs. Options on the client will be respected (such as retries) when making this request.
 
 ```py
 import httpx
@@ -317,18 +325,19 @@ can also get all the extra fields on the Pydantic model as a dict with
 
 You can directly override the [httpx client](https://www.python-httpx.org/api/#client) to customize it for your use case, including:
 
-- Support for proxies
-- Custom transports
+- Support for [proxies](https://www.python-httpx.org/advanced/proxies/)
+- Custom [transports](https://www.python-httpx.org/advanced/transports/)
 - Additional [advanced](https://www.python-httpx.org/advanced/clients/) functionality
 
 ```python
-from llama_stack_client import LlamaStackClient, DefaultHttpxClient
+import httpx
+from llama_stack_cli import LlamaStackCli, DefaultHttpxClient
 
-client = LlamaStackClient(
-    # Or use the `LLAMA_STACK_CLIENT_BASE_URL` env var
+client = LlamaStackCli(
+    # Or use the `LLAMA_STACK_CLI_BASE_URL` env var
     base_url="http://my.test.server.example.com:8083",
     http_client=DefaultHttpxClient(
-        proxies="http://my.test.proxy.example.com",
+        proxy="http://my.test.proxy.example.com",
         transport=httpx.HTTPTransport(local_address="0.0.0.0"),
     ),
 )
@@ -344,17 +353,27 @@ client.with_options(http_client=DefaultHttpxClient(...))
 
 By default the library closes underlying HTTP connections whenever the client is [garbage collected](https://docs.python.org/3/reference/datamodel.html#object.__del__). You can manually close the client using the `.close()` method if desired, or with a context manager that closes when exiting.
 
+```py
+from llama_stack_cli import LlamaStackCli
+
+with LlamaStackCli() as client:
+  # make requests here
+  ...
+
+# HTTP client is now closed
+```
+
 ## Versioning
 
 This package generally follows [SemVer](https://semver.org/spec/v2.0.0.html) conventions, though certain backwards-incompatible changes may be released as minor versions:
 
 1. Changes that only affect static types, without breaking runtime behavior.
-2. Changes to library internals which are technically public but not intended or documented for external use. _(Please open a GitHub issue to let us know if you are relying on such internals)_.
+2. Changes to library internals which are technically public but not intended or documented for external use. _(Please open a GitHub issue to let us know if you are relying on such internals.)_
 3. Changes that we do not expect to impact the vast majority of users in practice.
 
 We take backwards-compatibility seriously and work hard to ensure you can rely on a smooth upgrade experience.
 
-We are keen for your feedback; please open an [issue](https://www.github.com/meta-llama/llama-stack-python/issues) with questions, bugs, or suggestions.
+We are keen for your feedback; please open an [issue](https://www.github.com/stainless-sdks/llama-stack-cli-python/issues) with questions, bugs, or suggestions.
 
 ### Determining the installed version
 
@@ -363,10 +382,14 @@ If you've upgraded to the latest version but aren't seeing any new features you
 You can determine the version that is being used at runtime with:
 
 ```py
-import llama_stack_client
-print(llama_stack_client.__version__)
+import llama_stack_cli
+print(llama_stack_cli.__version__)
 ```
 
 ## Requirements
 
-Python 3.10 or higher.
+Python 3.8 or higher.
+
+## Contributing
+
+See [the contributing documentation](./CONTRIBUTING.md).
diff --git a/SECURITY.md b/SECURITY.md
index 0117165c..df3e40a2 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -2,9 +2,9 @@
 
 ## Reporting Security Issues
 
-This SDK is generated by [Stainless Software Inc](http://stainlessapi.com). Stainless takes security seriously, and encourages you to report any security vulnerability promptly so that appropriate action can be taken.
+This SDK is generated by [Stainless Software Inc](http://stainless.com). Stainless takes security seriously, and encourages you to report any security vulnerability promptly so that appropriate action can be taken.
 
-To report a security issue, please contact the Stainless team at security@stainlessapi.com.
+To report a security issue, please contact the Stainless team at security@stainless.com.
 
 ## Responsible Disclosure
 
@@ -16,11 +16,7 @@ before making any information public.
 ## Reporting Non-SDK Related Security Issues
 
 If you encounter security issues that are not directly related to SDKs but pertain to the services
-or products provided by Llama Stack Client please follow the respective company's security reporting guidelines.
-
-### Llama Stack Client Terms and Policies
-
-Please contact dev-feedback@llama-stack-client.com for any questions or concerns regarding security of our services.
+or products provided by Llama Stack Cli, please follow the respective company's security reporting guidelines.
 
 ---
 
diff --git a/api.md b/api.md
index b7a863dd..eb5300e7 100644
--- a/api.md
+++ b/api.md
@@ -1,346 +1,68 @@
 # Shared Types
 
 ```python
-from llama_stack_client.types import (
-    Attachment,
-    BatchCompletion,
-    CompletionMessage,
-    SamplingParams,
-    SystemMessage,
-    ToolCall,
-    ToolResponseMessage,
-    UserMessage,
-)
-```
-
-# Telemetry
-
-Types:
-
-```python
-from llama_stack_client.types import TelemetryGetTraceResponse
-```
-
-Methods:
-
-- <code title="get /telemetry/get_trace">client.telemetry.<a href="./src/llama_stack_client/resources/telemetry.py">get_trace</a>(\*\*<a href="src/llama_stack_client/types/telemetry_get_trace_params.py">params</a>) -> <a href="./src/llama_stack_client/types/telemetry_get_trace_response.py">TelemetryGetTraceResponse</a></code>
-- <code title="post /telemetry/log_event">client.telemetry.<a href="./src/llama_stack_client/resources/telemetry.py">log</a>(\*\*<a href="src/llama_stack_client/types/telemetry_log_params.py">params</a>) -> None</code>
-
-# Agents
-
-Types:
-
-```python
-from llama_stack_client.types import (
-    InferenceStep,
-    MemoryRetrievalStep,
-    RestAPIExecutionConfig,
-    ShieldCallStep,
-    ToolExecutionStep,
-    ToolParamDefinition,
-    AgentCreateResponse,
-)
-```
-
-Methods:
-
-- <code title="post /agents/create">client.agents.<a href="./src/llama_stack_client/resources/agents/agents.py">create</a>(\*\*<a href="src/llama_stack_client/types/agent_create_params.py">params</a>) -> <a href="./src/llama_stack_client/types/agent_create_response.py">AgentCreateResponse</a></code>
-- <code title="post /agents/delete">client.agents.<a href="./src/llama_stack_client/resources/agents/agents.py">delete</a>(\*\*<a href="src/llama_stack_client/types/agent_delete_params.py">params</a>) -> None</code>
-
-## Sessions
-
-Types:
-
-```python
-from llama_stack_client.types.agents import Session, SessionCreateResponse
-```
-
-Methods:
-
-- <code title="post /agents/session/create">client.agents.sessions.<a href="./src/llama_stack_client/resources/agents/sessions.py">create</a>(\*\*<a href="src/llama_stack_client/types/agents/session_create_params.py">params</a>) -> <a href="./src/llama_stack_client/types/agents/session_create_response.py">SessionCreateResponse</a></code>
-- <code title="post /agents/session/get">client.agents.sessions.<a href="./src/llama_stack_client/resources/agents/sessions.py">retrieve</a>(\*\*<a href="src/llama_stack_client/types/agents/session_retrieve_params.py">params</a>) -> <a href="./src/llama_stack_client/types/agents/session.py">Session</a></code>
-- <code title="post /agents/session/delete">client.agents.sessions.<a href="./src/llama_stack_client/resources/agents/sessions.py">delete</a>(\*\*<a href="src/llama_stack_client/types/agents/session_delete_params.py">params</a>) -> None</code>
-
-## Steps
-
-Types:
-
-```python
-from llama_stack_client.types.agents import AgentsStep
-```
-
-Methods:
-
-- <code title="get /agents/step/get">client.agents.steps.<a href="./src/llama_stack_client/resources/agents/steps.py">retrieve</a>(\*\*<a href="src/llama_stack_client/types/agents/step_retrieve_params.py">params</a>) -> <a href="./src/llama_stack_client/types/agents/agents_step.py">AgentsStep</a></code>
-
-## Turns
-
-Types:
-
-```python
-from llama_stack_client.types.agents import AgentsTurnStreamChunk, Turn, TurnStreamEvent
-```
-
-Methods:
-
-- <code title="post /agents/turn/create">client.agents.turns.<a href="./src/llama_stack_client/resources/agents/turns.py">create</a>(\*\*<a href="src/llama_stack_client/types/agents/turn_create_params.py">params</a>) -> <a href="./src/llama_stack_client/types/agents/agents_turn_stream_chunk.py">AgentsTurnStreamChunk</a></code>
-- <code title="get /agents/turn/get">client.agents.turns.<a href="./src/llama_stack_client/resources/agents/turns.py">retrieve</a>(\*\*<a href="src/llama_stack_client/types/agents/turn_retrieve_params.py">params</a>) -> <a href="./src/llama_stack_client/types/agents/turn.py">Turn</a></code>
-
-# Datasets
-
-Types:
-
-```python
-from llama_stack_client.types import TrainEvalDataset
-```
-
-Methods:
-
-- <code title="post /datasets/create">client.datasets.<a href="./src/llama_stack_client/resources/datasets.py">create</a>(\*\*<a href="src/llama_stack_client/types/dataset_create_params.py">params</a>) -> None</code>
-- <code title="post /datasets/delete">client.datasets.<a href="./src/llama_stack_client/resources/datasets.py">delete</a>(\*\*<a href="src/llama_stack_client/types/dataset_delete_params.py">params</a>) -> None</code>
-- <code title="get /datasets/get">client.datasets.<a href="./src/llama_stack_client/resources/datasets.py">get</a>(\*\*<a href="src/llama_stack_client/types/dataset_get_params.py">params</a>) -> <a href="./src/llama_stack_client/types/train_eval_dataset.py">TrainEvalDataset</a></code>
-
-# Evaluate
-
-Types:
-
-```python
-from llama_stack_client.types import EvaluationJob
-```
-
-## Jobs
-
-Types:
-
-```python
-from llama_stack_client.types.evaluate import (
-    EvaluationJobArtifacts,
-    EvaluationJobLogStream,
-    EvaluationJobStatus,
-)
-```
-
-Methods:
-
-- <code title="get /evaluate/jobs">client.evaluate.jobs.<a href="./src/llama_stack_client/resources/evaluate/jobs/jobs.py">list</a>() -> <a href="./src/llama_stack_client/types/evaluation_job.py">EvaluationJob</a></code>
-- <code title="post /evaluate/job/cancel">client.evaluate.jobs.<a href="./src/llama_stack_client/resources/evaluate/jobs/jobs.py">cancel</a>(\*\*<a href="src/llama_stack_client/types/evaluate/job_cancel_params.py">params</a>) -> None</code>
-
-### Artifacts
-
-Methods:
-
-- <code title="get /evaluate/job/artifacts">client.evaluate.jobs.artifacts.<a href="./src/llama_stack_client/resources/evaluate/jobs/artifacts.py">list</a>(\*\*<a href="src/llama_stack_client/types/evaluate/jobs/artifact_list_params.py">params</a>) -> <a href="./src/llama_stack_client/types/evaluate/evaluation_job_artifacts.py">EvaluationJobArtifacts</a></code>
-
-### Logs
-
-Methods:
-
-- <code title="get /evaluate/job/logs">client.evaluate.jobs.logs.<a href="./src/llama_stack_client/resources/evaluate/jobs/logs.py">list</a>(\*\*<a href="src/llama_stack_client/types/evaluate/jobs/log_list_params.py">params</a>) -> <a href="./src/llama_stack_client/types/evaluate/evaluation_job_log_stream.py">EvaluationJobLogStream</a></code>
-
-### Status
-
-Methods:
-
-- <code title="get /evaluate/job/status">client.evaluate.jobs.status.<a href="./src/llama_stack_client/resources/evaluate/jobs/status.py">list</a>(\*\*<a href="src/llama_stack_client/types/evaluate/jobs/status_list_params.py">params</a>) -> <a href="./src/llama_stack_client/types/evaluate/evaluation_job_status.py">EvaluationJobStatus</a></code>
-
-## QuestionAnswering
-
-Methods:
-
-- <code title="post /evaluate/question_answering/">client.evaluate.question_answering.<a href="./src/llama_stack_client/resources/evaluate/question_answering.py">create</a>(\*\*<a href="src/llama_stack_client/types/evaluate/question_answering_create_params.py">params</a>) -> <a href="./src/llama_stack_client/types/evaluation_job.py">EvaluationJob</a></code>
-
-# Evaluations
-
-Methods:
-
-- <code title="post /evaluate/summarization/">client.evaluations.<a href="./src/llama_stack_client/resources/evaluations.py">summarization</a>(\*\*<a href="src/llama_stack_client/types/evaluation_summarization_params.py">params</a>) -> <a href="./src/llama_stack_client/types/evaluation_job.py">EvaluationJob</a></code>
-- <code title="post /evaluate/text_generation/">client.evaluations.<a href="./src/llama_stack_client/resources/evaluations.py">text_generation</a>(\*\*<a href="src/llama_stack_client/types/evaluation_text_generation_params.py">params</a>) -> <a href="./src/llama_stack_client/types/evaluation_job.py">EvaluationJob</a></code>
-
-# Inference
-
-Types:
-
-```python
-from llama_stack_client.types import (
-    ChatCompletionStreamChunk,
-    CompletionStreamChunk,
-    TokenLogProbs,
-    InferenceChatCompletionResponse,
-    InferenceCompletionResponse,
-)
-```
-
-Methods:
-
-- <code title="post /inference/chat_completion">client.inference.<a href="./src/llama_stack_client/resources/inference/inference.py">chat_completion</a>(\*\*<a href="src/llama_stack_client/types/inference_chat_completion_params.py">params</a>) -> <a href="./src/llama_stack_client/types/inference_chat_completion_response.py">InferenceChatCompletionResponse</a></code>
-- <code title="post /inference/completion">client.inference.<a href="./src/llama_stack_client/resources/inference/inference.py">completion</a>(\*\*<a href="src/llama_stack_client/types/inference_completion_params.py">params</a>) -> <a href="./src/llama_stack_client/types/inference_completion_response.py">InferenceCompletionResponse</a></code>
-
-## Embeddings
-
-Types:
-
-```python
-from llama_stack_client.types.inference import Embeddings
-```
-
-Methods:
-
-- <code title="post /inference/embeddings">client.inference.embeddings.<a href="./src/llama_stack_client/resources/inference/embeddings.py">create</a>(\*\*<a href="src/llama_stack_client/types/inference/embedding_create_params.py">params</a>) -> <a href="./src/llama_stack_client/types/inference/embeddings.py">Embeddings</a></code>
-
-# Safety
-
-Types:
-
-```python
-from llama_stack_client.types import RunSheidResponse
-```
-
-Methods:
-
-- <code title="post /safety/run_shield">client.safety.<a href="./src/llama_stack_client/resources/safety.py">run_shield</a>(\*\*<a href="src/llama_stack_client/types/safety_run_shield_params.py">params</a>) -> <a href="./src/llama_stack_client/types/run_sheid_response.py">RunSheidResponse</a></code>
-
-# Memory
-
-Types:
-
-```python
-from llama_stack_client.types import (
-    QueryDocuments,
-    MemoryCreateResponse,
-    MemoryRetrieveResponse,
-    MemoryListResponse,
-    MemoryDropResponse,
-)
-```
-
-Methods:
-
-- <code title="post /memory/create">client.memory.<a href="./src/llama_stack_client/resources/memory/memory.py">create</a>(\*\*<a href="src/llama_stack_client/types/memory_create_params.py">params</a>) -> <a href="./src/llama_stack_client/types/memory_create_response.py">object</a></code>
-- <code title="get /memory/get">client.memory.<a href="./src/llama_stack_client/resources/memory/memory.py">retrieve</a>(\*\*<a href="src/llama_stack_client/types/memory_retrieve_params.py">params</a>) -> <a href="./src/llama_stack_client/types/memory_retrieve_response.py">object</a></code>
-- <code title="post /memory/update">client.memory.<a href="./src/llama_stack_client/resources/memory/memory.py">update</a>(\*\*<a href="src/llama_stack_client/types/memory_update_params.py">params</a>) -> None</code>
-- <code title="get /memory/list">client.memory.<a href="./src/llama_stack_client/resources/memory/memory.py">list</a>() -> <a href="./src/llama_stack_client/types/memory_list_response.py">object</a></code>
-- <code title="post /memory/drop">client.memory.<a href="./src/llama_stack_client/resources/memory/memory.py">drop</a>(\*\*<a href="src/llama_stack_client/types/memory_drop_params.py">params</a>) -> str</code>
-- <code title="post /memory/insert">client.memory.<a href="./src/llama_stack_client/resources/memory/memory.py">insert</a>(\*\*<a href="src/llama_stack_client/types/memory_insert_params.py">params</a>) -> None</code>
-- <code title="post /memory/query">client.memory.<a href="./src/llama_stack_client/resources/memory/memory.py">query</a>(\*\*<a href="src/llama_stack_client/types/memory_query_params.py">params</a>) -> <a href="./src/llama_stack_client/types/query_documents.py">QueryDocuments</a></code>
-
-## Documents
-
-Types:
-
-```python
-from llama_stack_client.types.memory import DocumentRetrieveResponse
+from llama_stack_cli.types import Order
 ```
 
-Methods:
-
-- <code title="post /memory/documents/get">client.memory.documents.<a href="./src/llama_stack_client/resources/memory/documents.py">retrieve</a>(\*\*<a href="src/llama_stack_client/types/memory/document_retrieve_params.py">params</a>) -> <a href="./src/llama_stack_client/types/memory/document_retrieve_response.py">DocumentRetrieveResponse</a></code>
-- <code title="post /memory/documents/delete">client.memory.documents.<a href="./src/llama_stack_client/resources/memory/documents.py">delete</a>(\*\*<a href="src/llama_stack_client/types/memory/document_delete_params.py">params</a>) -> None</code>
-
-# PostTraining
+# Pet
 
 Types:
 
 ```python
-from llama_stack_client.types import PostTrainingJob
-```
-
-Methods:
-
-- <code title="post /post_training/preference_optimize">client.post_training.<a href="./src/llama_stack_client/resources/post_training/post_training.py">preference_optimize</a>(\*\*<a href="src/llama_stack_client/types/post_training_preference_optimize_params.py">params</a>) -> <a href="./src/llama_stack_client/types/post_training_job.py">PostTrainingJob</a></code>
-- <code title="post /post_training/supervised_fine_tune">client.post_training.<a href="./src/llama_stack_client/resources/post_training/post_training.py">supervised_fine_tune</a>(\*\*<a href="src/llama_stack_client/types/post_training_supervised_fine_tune_params.py">params</a>) -> <a href="./src/llama_stack_client/types/post_training_job.py">PostTrainingJob</a></code>
-
-## Jobs
-
-Types:
-
-```python
-from llama_stack_client.types.post_training import (
-    PostTrainingJobArtifacts,
-    PostTrainingJobLogStream,
-    PostTrainingJobStatus,
+from llama_stack_cli.types import (
+    Category,
+    Pet,
+    PetFindByStatusResponse,
+    PetFindByTagsResponse,
+    PetUploadImageResponse,
 )
 ```
 
 Methods:
 
-- <code title="get /post_training/jobs">client.post_training.jobs.<a href="./src/llama_stack_client/resources/post_training/jobs.py">list</a>() -> <a href="./src/llama_stack_client/types/post_training_job.py">PostTrainingJob</a></code>
-- <code title="get /post_training/job/artifacts">client.post_training.jobs.<a href="./src/llama_stack_client/resources/post_training/jobs.py">artifacts</a>(\*\*<a href="src/llama_stack_client/types/post_training/job_artifacts_params.py">params</a>) -> <a href="./src/llama_stack_client/types/post_training/post_training_job_artifacts.py">PostTrainingJobArtifacts</a></code>
-- <code title="post /post_training/job/cancel">client.post_training.jobs.<a href="./src/llama_stack_client/resources/post_training/jobs.py">cancel</a>(\*\*<a href="src/llama_stack_client/types/post_training/job_cancel_params.py">params</a>) -> None</code>
-- <code title="get /post_training/job/logs">client.post_training.jobs.<a href="./src/llama_stack_client/resources/post_training/jobs.py">logs</a>(\*\*<a href="src/llama_stack_client/types/post_training/job_logs_params.py">params</a>) -> <a href="./src/llama_stack_client/types/post_training/post_training_job_log_stream.py">PostTrainingJobLogStream</a></code>
-- <code title="get /post_training/job/status">client.post_training.jobs.<a href="./src/llama_stack_client/resources/post_training/jobs.py">status</a>(\*\*<a href="src/llama_stack_client/types/post_training/job_status_params.py">params</a>) -> <a href="./src/llama_stack_client/types/post_training/post_training_job_status.py">PostTrainingJobStatus</a></code>
+- <code title="post /pet">client.pet.<a href="./src/llama_stack_cli/resources/pet.py">create</a>(\*\*<a href="src/llama_stack_cli/types/pet_create_params.py">params</a>) -> <a href="./src/llama_stack_cli/types/pet.py">Pet</a></code>
+- <code title="get /pet/{petId}">client.pet.<a href="./src/llama_stack_cli/resources/pet.py">retrieve</a>(pet_id) -> <a href="./src/llama_stack_cli/types/pet.py">Pet</a></code>
+- <code title="put /pet">client.pet.<a href="./src/llama_stack_cli/resources/pet.py">update</a>(\*\*<a href="src/llama_stack_cli/types/pet_update_params.py">params</a>) -> <a href="./src/llama_stack_cli/types/pet.py">Pet</a></code>
+- <code title="delete /pet/{petId}">client.pet.<a href="./src/llama_stack_cli/resources/pet.py">delete</a>(pet_id) -> None</code>
+- <code title="get /pet/findByStatus">client.pet.<a href="./src/llama_stack_cli/resources/pet.py">find_by_status</a>(\*\*<a href="src/llama_stack_cli/types/pet_find_by_status_params.py">params</a>) -> <a href="./src/llama_stack_cli/types/pet_find_by_status_response.py">PetFindByStatusResponse</a></code>
+- <code title="get /pet/findByTags">client.pet.<a href="./src/llama_stack_cli/resources/pet.py">find_by_tags</a>(\*\*<a href="src/llama_stack_cli/types/pet_find_by_tags_params.py">params</a>) -> <a href="./src/llama_stack_cli/types/pet_find_by_tags_response.py">PetFindByTagsResponse</a></code>
+- <code title="post /pet/{petId}">client.pet.<a href="./src/llama_stack_cli/resources/pet.py">update_by_id</a>(pet_id, \*\*<a href="src/llama_stack_cli/types/pet_update_by_id_params.py">params</a>) -> None</code>
+- <code title="post /pet/{petId}/uploadImage">client.pet.<a href="./src/llama_stack_cli/resources/pet.py">upload_image</a>(pet_id, \*\*<a href="src/llama_stack_cli/types/pet_upload_image_params.py">params</a>) -> <a href="./src/llama_stack_cli/types/pet_upload_image_response.py">PetUploadImageResponse</a></code>
 
-# RewardScoring
+# Store
 
 Types:
 
 ```python
-from llama_stack_client.types import RewardScoring, ScoredDialogGenerations
+from llama_stack_cli.types import StoreListInventoryResponse
 ```
 
 Methods:
 
-- <code title="post /reward_scoring/score">client.reward_scoring.<a href="./src/llama_stack_client/resources/reward_scoring.py">score</a>(\*\*<a href="src/llama_stack_client/types/reward_scoring_score_params.py">params</a>) -> <a href="./src/llama_stack_client/types/reward_scoring.py">RewardScoring</a></code>
-
-# SyntheticDataGeneration
-
-Types:
-
-```python
-from llama_stack_client.types import SyntheticDataGeneration
-```
-
-Methods:
-
-- <code title="post /synthetic_data_generation/generate">client.synthetic_data_generation.<a href="./src/llama_stack_client/resources/synthetic_data_generation.py">generate</a>(\*\*<a href="src/llama_stack_client/types/synthetic_data_generation_generate_params.py">params</a>) -> <a href="./src/llama_stack_client/types/synthetic_data_generation.py">SyntheticDataGeneration</a></code>
-
-# BatchInference
+- <code title="get /store/inventory">client.store.<a href="./src/llama_stack_cli/resources/store/store.py">list_inventory</a>() -> <a href="./src/llama_stack_cli/types/store_list_inventory_response.py">StoreListInventoryResponse</a></code>
 
-Types:
-
-```python
-from llama_stack_client.types import BatchChatCompletion
-```
-
-Methods:
-
-- <code title="post /batch_inference/chat_completion">client.batch_inference.<a href="./src/llama_stack_client/resources/batch_inference.py">chat_completion</a>(\*\*<a href="src/llama_stack_client/types/batch_inference_chat_completion_params.py">params</a>) -> <a href="./src/llama_stack_client/types/batch_chat_completion.py">BatchChatCompletion</a></code>
-- <code title="post /batch_inference/completion">client.batch_inference.<a href="./src/llama_stack_client/resources/batch_inference.py">completion</a>(\*\*<a href="src/llama_stack_client/types/batch_inference_completion_params.py">params</a>) -> <a href="./src/llama_stack_client/types/shared/batch_completion.py">BatchCompletion</a></code>
-
-# Models
-
-Types:
-
-```python
-from llama_stack_client.types import ModelServingSpec
-```
-
-Methods:
-
-- <code title="get /models/list">client.models.<a href="./src/llama_stack_client/resources/models.py">list</a>() -> <a href="./src/llama_stack_client/types/model_serving_spec.py">ModelServingSpec</a></code>
-- <code title="get /models/get">client.models.<a href="./src/llama_stack_client/resources/models.py">get</a>(\*\*<a href="src/llama_stack_client/types/model_get_params.py">params</a>) -> <a href="./src/llama_stack_client/types/model_serving_spec.py">Optional</a></code>
-
-# MemoryBanks
-
-Types:
-
-```python
-from llama_stack_client.types import MemoryBankSpec
-```
+## Order
 
 Methods:
 
-- <code title="get /memory_banks/list">client.memory_banks.<a href="./src/llama_stack_client/resources/memory_banks.py">list</a>() -> <a href="./src/llama_stack_client/types/memory_bank_spec.py">MemoryBankSpec</a></code>
-- <code title="get /memory_banks/get">client.memory_banks.<a href="./src/llama_stack_client/resources/memory_banks.py">get</a>(\*\*<a href="src/llama_stack_client/types/memory_bank_get_params.py">params</a>) -> <a href="./src/llama_stack_client/types/memory_bank_spec.py">Optional</a></code>
+- <code title="post /store/order">client.store.order.<a href="./src/llama_stack_cli/resources/store/order.py">create</a>(\*\*<a href="src/llama_stack_cli/types/store/order_create_params.py">params</a>) -> <a href="./src/llama_stack_cli/types/shared/order.py">Order</a></code>
+- <code title="get /store/order/{orderId}">client.store.order.<a href="./src/llama_stack_cli/resources/store/order.py">retrieve</a>(order_id) -> <a href="./src/llama_stack_cli/types/shared/order.py">Order</a></code>
+- <code title="delete /store/order/{orderId}">client.store.order.<a href="./src/llama_stack_cli/resources/store/order.py">delete</a>(order_id) -> None</code>
 
-# Shields
+# User
 
 Types:
 
 ```python
-from llama_stack_client.types import ShieldSpec
+from llama_stack_cli.types import User, UserLoginResponse
 ```
 
 Methods:
 
-- <code title="get /shields/list">client.shields.<a href="./src/llama_stack_client/resources/shields.py">list</a>() -> <a href="./src/llama_stack_client/types/shield_spec.py">ShieldSpec</a></code>
-- <code title="get /shields/get">client.shields.<a href="./src/llama_stack_client/resources/shields.py">get</a>(\*\*<a href="src/llama_stack_client/types/shield_get_params.py">params</a>) -> <a href="./src/llama_stack_client/types/shield_spec.py">Optional</a></code>
+- <code title="post /user">client.user.<a href="./src/llama_stack_cli/resources/user.py">create</a>(\*\*<a href="src/llama_stack_cli/types/user_create_params.py">params</a>) -> <a href="./src/llama_stack_cli/types/user.py">User</a></code>
+- <code title="get /user/{username}">client.user.<a href="./src/llama_stack_cli/resources/user.py">retrieve</a>(username) -> <a href="./src/llama_stack_cli/types/user.py">User</a></code>
+- <code title="put /user/{username}">client.user.<a href="./src/llama_stack_cli/resources/user.py">update</a>(existing_username, \*\*<a href="src/llama_stack_cli/types/user_update_params.py">params</a>) -> None</code>
+- <code title="delete /user/{username}">client.user.<a href="./src/llama_stack_cli/resources/user.py">delete</a>(username) -> None</code>
+- <code title="post /user/createWithList">client.user.<a href="./src/llama_stack_cli/resources/user.py">create_with_list</a>(\*\*<a href="src/llama_stack_cli/types/user_create_with_list_params.py">params</a>) -> <a href="./src/llama_stack_cli/types/user.py">User</a></code>
+- <code title="get /user/login">client.user.<a href="./src/llama_stack_cli/resources/user.py">login</a>(\*\*<a href="src/llama_stack_cli/types/user_login_params.py">params</a>) -> str</code>
+- <code title="get /user/logout">client.user.<a href="./src/llama_stack_cli/resources/user.py">logout</a>() -> None</code>
diff --git a/bin/publish-pypi b/bin/publish-pypi
index 05bfccbb..826054e9 100644
--- a/bin/publish-pypi
+++ b/bin/publish-pypi
@@ -3,7 +3,4 @@
 set -eux
 mkdir -p dist
 rye build --clean
-# Patching importlib-metadata version until upstream library version is updated
-# https://github.com/pypa/twine/issues/977#issuecomment-2189800841
-"$HOME/.rye/self/bin/python3" -m pip install 'importlib-metadata==7.2.1'
 rye publish --yes --token=$PYPI_TOKEN
diff --git a/docs/cli_reference.md b/docs/cli_reference.md
deleted file mode 100644
index 48f36323..00000000
--- a/docs/cli_reference.md
+++ /dev/null
@@ -1,918 +0,0 @@
-# CLI Reference
-
-Welcome to the llama-stack-client CLI - a command-line interface for interacting with Llama Stack
-
-```
-Usage: llama-stack-client [OPTIONS] COMMAND [ARGS]...
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-* **--version**: Show the version and exit. [default: False]
-
-* **--endpoint**: Llama Stack distribution endpoint [default: ]
-
-* **--api-key**: Llama Stack distribution API key [default: ]
-
-* **--config**: Path to config file
-
-**Commands**
-
-* **configure**: Configure Llama Stack Client CLI.
-
-* **datasets**: Manage datasets.
-
-* **eval**: Run evaluation tasks.
-
-* **eval_tasks**: Manage evaluation tasks.
-
-* **inference**: Inference (chat).
-
-* **inspect**: Inspect server configuration.
-
-* **models**: Manage GenAI models.
-
-* **post_training**: Post-training.
-
-* **providers**: Manage API providers.
-
-* **scoring_functions**: Manage scoring functions.
-
-* **shields**: Manage safety shield services.
-
-* **toolgroups**: Manage available tool groups.
-
-* **vector_dbs**: Manage vector databases.
-
-
-
-## configure
-
-Configure Llama Stack Client CLI.
-
-```
-Usage: llama-stack-client configure [OPTIONS]
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-* **--endpoint**: Llama Stack distribution endpoint [default: ]
-
-* **--api-key**: Llama Stack distribution API key [default: ]
-
-
-
-## datasets
-
-Manage datasets.
-
-```
-Usage: llama-stack-client datasets [OPTIONS] COMMAND [ARGS]...
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-**Commands**
-
-* **list**: Show available datasets on distribution endpoint
-
-* **register**: Create a new dataset
-
-* **unregister**: Remove a dataset
-
-
-
-### list
-
-Show available datasets on distribution endpoint
-
-```
-Usage: llama-stack-client datasets list [OPTIONS]
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-
-
-### register
-
-Create a new dataset
-
-```
-Usage: llama-stack-client datasets register [OPTIONS]
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-* **--dataset-id**: Id of the dataset
-
-* **--purpose**: Purpose of the dataset
-
-* **--metadata**: Metadata of the dataset
-
-* **--url**: URL of the dataset
-
-* **--dataset-path**: Local file path to the dataset. If specified, upload dataset via URL
-
-
-
-### unregister
-
-Remove a dataset
-
-```
-Usage: llama-stack-client datasets unregister [OPTIONS] DATASET_ID
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-**Arguments**
-
-* **DATASET_ID**
-
-
-
-## eval
-
-Run evaluation tasks.
-
-```
-Usage: llama-stack-client eval [OPTIONS] COMMAND [ARGS]...
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-**Commands**
-
-* **run-benchmark**: Run a evaluation benchmark task
-
-* **run-scoring**: Run scoring from application datasets
-
-
-
-### run-benchmark
-
-Run a evaluation benchmark task
-
-```
-Usage: llama-stack-client eval run-benchmark [OPTIONS] BENCHMARK_IDS...
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-* **--model-id**: model id to run the benchmark eval on
-
-* **--output-dir**: Path to the dump eval results output directory
-
-* **--num-examples**: Number of examples to evaluate on, useful for debugging
-
-* **--temperature**: temperature in the sampling params to run generation [default: 0.0]
-
-* **--max-tokens**: max-tokens in the sampling params to run generation [default: 4096]
-
-* **--top-p**: top-p in the sampling params to run generation [default: 0.9]
-
-* **--repeat-penalty**: repeat-penalty in the sampling params to run generation [default: 1.0]
-
-* **--visualize**: Visualize evaluation results after completion [default: False]
-
-**Arguments**
-
-* **BENCHMARK_IDS**
-
-
-
-### run-scoring
-
-Run scoring from application datasets
-
-```
-Usage: llama-stack-client eval run-scoring [OPTIONS] SCORING_FUNCTION_IDS...
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-* **--dataset-id**: Pre-registered dataset_id to score (from llama-stack-client datasets list)
-
-* **--dataset-path**: Path to the dataset file to score
-
-* **--scoring-params-config**: Path to the scoring params config file in JSON format
-
-* **--num-examples**: Number of examples to evaluate on, useful for debugging
-
-* **--output-dir**: Path to the dump eval results output directory
-
-* **--visualize**: Visualize evaluation results after completion [default: False]
-
-**Arguments**
-
-* **SCORING_FUNCTION_IDS**
-
-
-
-## eval-tasks
-
-Manage evaluation tasks.
-
-```
-Usage: llama-stack-client eval-tasks [OPTIONS] COMMAND [ARGS]...
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-**Commands**
-
-* **list**: Show available eval tasks on distribution endpoint
-
-* **register**: Register a new eval task
-
-
-
-### list
-
-Show available eval tasks on distribution endpoint
-
-```
-Usage: llama-stack-client eval-tasks list [OPTIONS]
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-
-
-### register
-
-Register a new eval task
-
-```
-Usage: llama-stack-client eval-tasks register [OPTIONS]
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-* **--eval-task-id**: ID of the eval task
-
-* **--dataset-id**: ID of the dataset to evaluate
-
-* **--scoring-functions**: Scoring functions to use for evaluation
-
-* **--provider-id**: Provider ID for the eval task
-
-* **--provider-eval-task-id**: Provider's eval task ID
-
-* **--metadata**: Metadata for the eval task in JSON format
-
-
-
-## inference
-
-Inference (chat).
-
-```
-Usage: llama-stack-client inference [OPTIONS] COMMAND [ARGS]...
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-**Commands**
-
-* **chat-completion**: Show available inference chat completion endpoints on distribution endpoint
-
-
-
-### chat-completion
-
-Show available inference chat completion endpoints on distribution endpoint
-
-```
-Usage: llama-stack-client inference chat-completion [OPTIONS]
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-* **--message**: Message
-
-* **--stream**: Streaming [default: False]
-
-* **--session**: Start a Chat Session [default: False]
-
-* **--model-id**: Model ID
-
-
-
-## inspect
-
-Inspect server configuration.
-
-```
-Usage: llama-stack-client inspect [OPTIONS] COMMAND [ARGS]...
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-**Commands**
-
-* **version**: Show available providers on distribution endpoint
-
-
-
-### version
-
-Show available providers on distribution endpoint
-
-```
-Usage: llama-stack-client inspect version [OPTIONS]
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-
-
-## models
-
-Manage GenAI models.
-
-```
-Usage: llama-stack-client models [OPTIONS] COMMAND [ARGS]...
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-**Commands**
-
-* **get**: Show details of a specific model at the distribution endpoint
-
-* **list**: Show available llama models at distribution endpoint
-
-* **register**: Register a new model at distribution endpoint
-
-* **unregister**: Unregister a model from distribution endpoint
-
-
-
-### get
-
-Show details of a specific model at the distribution endpoint
-
-```
-Usage: llama-stack-client models get [OPTIONS] MODEL_ID
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-**Arguments**
-
-* **MODEL_ID**
-
-
-
-### list
-
-Show available llama models at distribution endpoint
-
-```
-Usage: llama-stack-client models list [OPTIONS]
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-
-
-### register
-
-Register a new model at distribution endpoint
-
-```
-Usage: llama-stack-client models register [OPTIONS] MODEL_ID
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-* **--provider-id**: Provider ID for the model
-
-* **--provider-model-id**: Provider's model ID
-
-* **--metadata**: JSON metadata for the model
-
-**Arguments**
-
-* **MODEL_ID**
-
-
-
-### unregister
-
-Unregister a model from distribution endpoint
-
-```
-Usage: llama-stack-client models unregister [OPTIONS] MODEL_ID
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-**Arguments**
-
-* **MODEL_ID**
-
-
-
-## post-training
-
-Post-training.
-
-```
-Usage: llama-stack-client post-training [OPTIONS] COMMAND [ARGS]...
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-**Commands**
-
-* **artifacts**: Get the training artifacts of a specific post training job
-
-* **cancel**: Cancel the training job
-
-* **list**: Show the list of available post training jobs
-
-* **status**: Show the status of a specific post training job
-
-* **supervised_fine_tune**: Kick off a supervised fine tune job
-
-
-
-### artifacts
-
-Get the training artifacts of a specific post training job
-
-```
-Usage: llama-stack-client post-training artifacts [OPTIONS]
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-* **--job-uuid**: Job UUID
-
-
-
-### cancel
-
-Cancel the training job
-
-```
-Usage: llama-stack-client post-training cancel [OPTIONS]
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-* **--job-uuid**: Job UUID
-
-
-
-### list
-
-Show the list of available post training jobs
-
-```
-Usage: llama-stack-client post-training list [OPTIONS]
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-
-
-### status
-
-Show the status of a specific post training job
-
-```
-Usage: llama-stack-client post-training status [OPTIONS]
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-* **--job-uuid**: Job UUID
-
-
-
-### supervised_fine_tune
-
-Kick off a supervised fine tune job
-
-```
-Usage: llama-stack-client post-training supervised_fine_tune 
-           [OPTIONS]
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-* **--job-uuid**: Job UUID
-
-* **--model**: Model ID
-
-* **--algorithm-config**: Algorithm Config
-
-* **--training-config**: Training Config
-
-* **--checkpoint-dir**: Checkpoint Config
-
-
-
-## providers
-
-Manage API providers.
-
-```
-Usage: llama-stack-client providers [OPTIONS] COMMAND [ARGS]...
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-**Commands**
-
-* **inspect**: Show available providers on distribution endpoint
-
-* **list**: Show available providers on distribution endpoint
-
-
-
-### inspect
-
-Show available providers on distribution endpoint
-
-```
-Usage: llama-stack-client providers inspect [OPTIONS] PROVIDER_ID
-```
-
-**Options**
-
-* **--help**: Show this message and exit. [default: False]
-
-**Arguments**
-
-* **PROVIDER_ID**
-
-
-
-### list
-
-Show available providers on distribution endpoint
-
-```
-Usage: llama-stack-client providers list [OPTIONS]
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-
-
-## scoring-functions
-
-Manage scoring functions.
-
-```
-Usage: llama-stack-client scoring-functions [OPTIONS] COMMAND [ARGS]...
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-**Commands**
-
-* **list**: Show available scoring functions on distribution endpoint
-
-* **register**: Register a new scoring function
-
-
-
-### list
-
-Show available scoring functions on distribution endpoint
-
-```
-Usage: llama-stack-client scoring-functions list [OPTIONS]
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-
-
-### register
-
-Register a new scoring function
-
-```
-Usage: llama-stack-client scoring-functions register [OPTIONS]
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-* **--scoring-fn-id**: Id of the scoring function
-
-* **--description**: Description of the scoring function
-
-* **--return-type**: Return type of the scoring function
-
-* **--provider-id**: Provider ID for the scoring function
-
-* **--provider-scoring-fn-id**: Provider's scoring function ID
-
-* **--params**: Parameters for the scoring function in JSON format
-
-
-
-## shields
-
-Manage safety shield services.
-
-```
-Usage: llama-stack-client shields [OPTIONS] COMMAND [ARGS]...
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-**Commands**
-
-* **list**: Show available safety shields on distribution endpoint
-
-* **register**: Register a new safety shield
-
-
-
-### list
-
-Show available safety shields on distribution endpoint
-
-```
-Usage: llama-stack-client shields list [OPTIONS]
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-
-
-### register
-
-Register a new safety shield
-
-```
-Usage: llama-stack-client shields register [OPTIONS]
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-* **--shield-id**: Id of the shield
-
-* **--provider-id**: Provider ID for the shield
-
-* **--provider-shield-id**: Provider's shield ID
-
-* **--params**: JSON configuration parameters for the shield
-
-
-
-## toolgroups
-
-Manage available tool groups.
-
-```
-Usage: llama-stack-client toolgroups [OPTIONS] COMMAND [ARGS]...
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-**Commands**
-
-* **get**: Show available llama toolgroups at distribution endpoint
-
-* **list**: Show available llama toolgroups at distribution endpoint
-
-* **register**: Register a new toolgroup at distribution endpoint
-
-* **unregister**: Unregister a toolgroup from distribution endpoint
-
-
-
-### get
-
-Show available llama toolgroups at distribution endpoint
-
-```
-Usage: llama-stack-client toolgroups get [OPTIONS] TOOLGROUP_ID
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-**Arguments**
-
-* **TOOLGROUP_ID**
-
-
-
-### list
-
-Show available llama toolgroups at distribution endpoint
-
-```
-Usage: llama-stack-client toolgroups list [OPTIONS]
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-
-
-### register
-
-Register a new toolgroup at distribution endpoint
-
-```
-Usage: llama-stack-client toolgroups register [OPTIONS] TOOLGROUP_ID
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-* **--provider-id**: Provider ID for the toolgroup
-
-* **--provider-toolgroup-id**: Provider's toolgroup ID
-
-* **--mcp-config**: JSON mcp_config for the toolgroup
-
-* **--args**: JSON args for the toolgroup
-
-**Arguments**
-
-* **TOOLGROUP_ID**
-
-
-
-### unregister
-
-Unregister a toolgroup from distribution endpoint
-
-```
-Usage: llama-stack-client toolgroups unregister [OPTIONS] TOOLGROUP_ID
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-**Arguments**
-
-* **TOOLGROUP_ID**
-
-
-
-## vector-dbs
-
-Manage vector databases.
-
-```
-Usage: llama-stack-client vector-dbs [OPTIONS] COMMAND [ARGS]...
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-**Commands**
-
-* **list**: Show available vector dbs on distribution endpoint
-
-* **register**: Create a new vector db
-
-* **unregister**: Delete a vector db
-
-
-
-### list
-
-Show available vector dbs on distribution endpoint
-
-```
-Usage: llama-stack-client vector-dbs list [OPTIONS]
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-
-
-### register
-
-Create a new vector db
-
-```
-Usage: llama-stack-client vector-dbs register [OPTIONS] VECTOR_DB_ID
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-* **--provider-id**: Provider ID for the vector db
-
-* **--provider-vector-db-id**: Provider's vector db ID
-
-* **--embedding-model**: Embedding model (for vector type) [default: all-MiniLM-L6-v2]
-
-* **--embedding-dimension**: Embedding dimension (for vector type) [default: 384]
-
-**Arguments**
-
-* **VECTOR_DB_ID**
-
-
-
-### unregister
-
-Delete a vector db
-
-```
-Usage: llama-stack-client vector-dbs unregister [OPTIONS] VECTOR_DB_ID
-```
-
-**Options**
-
-* **-h, --help**: Show this message and exit. [default: False]
-
-**Arguments**
-
-* **VECTOR_DB_ID**
diff --git a/examples/.keep b/examples/.keep
index 0651c89c..d8c73e93 100644
--- a/examples/.keep
+++ b/examples/.keep
@@ -1,4 +1,4 @@
 File generated from our OpenAPI spec by Stainless.
 
 This directory can be used to store example files demonstrating usage of this SDK.
-It is ignored by Stainless code generation and its content (other than this keep file) won't be touched.
+It is ignored by Stainless code generation and its content (other than this keep file) won't be touched.
\ No newline at end of file
diff --git a/examples/mcp_agent.py b/examples/mcp_agent.py
deleted file mode 100644
index 8dfd2f69..00000000
--- a/examples/mcp_agent.py
+++ /dev/null
@@ -1,136 +0,0 @@
-import json
-import logging
-from urllib.parse import urlparse
-
-import fire
-import httpx
-from llama_stack_client import Agent, AgentEventLogger, LlamaStackClient
-from llama_stack_client.lib import get_oauth_token_for_mcp_server
-from rich import print as rprint
-
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-
-
-import tempfile
-from pathlib import Path
-
-TMP_DIR = Path(tempfile.gettempdir()) / "llama-stack"
-TMP_DIR.mkdir(parents=True, exist_ok=True)
-
-CACHE_FILE = TMP_DIR / "mcp_tokens.json"
-
-
-def main(model_id: str, mcp_servers: str = "https://mcp.asana.com/sse", llama_stack_url: str = "http://localhost:8321"):
-    """Run an MCP agent with the specified model and servers.
-
-    Args:
-        model_id: The model to use for the agent.
-        mcp_servers: Comma-separated list of MCP servers to use for the agent.
-        llama_stack_url: The URL of the Llama Stack server to use.
-
-    Examples:
-        python mcp_agent.py "meta-llama/Llama-4-Scout-17B-16E-Instruct" \
-             -m "https://mcp.asana.com/sse" \
-             -l "http://localhost:8321"
-    """
-    client = LlamaStackClient(base_url=llama_stack_url)
-    if not check_model_exists(client, model_id):
-        return
-
-    servers = [s.strip() for s in mcp_servers.split(",")]
-    mcp_headers = get_and_cache_mcp_headers(servers)
-
-    toolgroup_ids = []
-    for server in servers:
-        # we cannot use "/" in the toolgroup_id because we have some tech debt from earlier which uses
-        # "/" as a separator for toolgroup_id and tool_name. We should fix this in the future.
-        group_id = urlparse(server).netloc
-        toolgroup_ids.append(group_id)
-        client.toolgroups.register(
-            toolgroup_id=group_id, mcp_endpoint=dict(uri=server), provider_id="model-context-protocol"
-        )
-
-    agent = Agent(
-        client=client,
-        model=model_id,
-        instructions="You are a helpful assistant who can use tools when necessary to answer questions.",
-        tools=toolgroup_ids,
-        extra_headers={
-            "X-LlamaStack-Provider-Data": json.dumps(
-                {
-                    "mcp_headers": mcp_headers,
-                }
-            ),
-        },
-    )
-
-    session_id = agent.create_session("test-session")
-
-    while True:
-        user_input = input("Enter a question: ")
-        if user_input.lower() in ("q", "quit", "exit", "bye", ""):
-            print("Exiting...")
-            break
-        response = agent.create_turn(
-            session_id=session_id,
-            messages=[{"role": "user", "content": user_input}],
-            stream=True,
-        )
-        for log in AgentEventLogger().log(response):
-            log.print()
-
-
-def check_model_exists(client: LlamaStackClient, model_id: str) -> bool:
-    models = [m for m in client.models.list() if m.model_type == "llm"]
-    if model_id not in [m.identifier for m in models]:
-        rprint(f"[red]Model {model_id} not found[/red]")
-        rprint("[yellow]Available models:[/yellow]")
-        for model in models:
-            rprint(f"  - {model.identifier}")
-        return False
-    return True
-
-
-def get_and_cache_mcp_headers(servers: list[str]) -> dict[str, dict[str, str]]:
-    mcp_headers = {}
-
-    logger.info(f"Using cache file: {CACHE_FILE} for MCP tokens")
-    tokens = {}
-    if CACHE_FILE.exists():
-        with open(CACHE_FILE, "r") as f:
-            tokens = json.load(f)
-            for server, token in tokens.items():
-                mcp_headers[server] = {
-                    "Authorization": f"Bearer {token}",
-                }
-
-    for server in servers:
-        with httpx.Client() as http_client:
-            headers = mcp_headers.get(server, {})
-            try:
-                response = http_client.get(server, headers=headers, timeout=1.0)
-            except httpx.TimeoutException:
-                # timeout means success since we did not get an immediate 40X
-                continue
-
-            if response.status_code in (401, 403):
-                logger.info(f"Server {server} requires authentication, getting token")
-                token = get_oauth_token_for_mcp_server(server)
-                if not token:
-                    logger.error(f"No token obtained for {server}")
-                    return
-
-                tokens[server] = token
-                mcp_headers[server] = {
-                    "Authorization": f"Bearer {token}",
-                }
-
-    with open(CACHE_FILE, "w") as f:
-        json.dump(tokens, f, indent=2)
-
-    return mcp_headers
-
-
-if __name__ == "__main__":
-    fire.Fire(main)
diff --git a/examples/post_training/supervised_fine_tune_client.py b/examples/post_training/supervised_fine_tune_client.py
deleted file mode 100644
index 1aca6ee1..00000000
--- a/examples/post_training/supervised_fine_tune_client.py
+++ /dev/null
@@ -1,108 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# This software may be used and distributed in accordance with the terms of the Llama 3 Community License Agreement.
-
-import asyncio
-from typing import Optional
-
-import fire
-from llama_stack_client import LlamaStackClient
-
-from llama_stack_client.types.post_training_supervised_fine_tune_params import (
-    AlgorithmConfigLoraFinetuningConfig,
-    TrainingConfig,
-    TrainingConfigDataConfig,
-    TrainingConfigEfficiencyConfig,
-    TrainingConfigOptimizerConfig,
-)
-
-
-async def run_main(
-    host: str,
-    port: int,
-    job_uuid: str,
-    model: str,
-    use_https: bool = False,
-    checkpoint_dir: Optional[str] = None,
-    cert_path: Optional[str] = None,
-):
-    # Construct the base URL with the appropriate protocol
-    protocol = "https" if use_https else "http"
-    base_url = f"{protocol}://{host}:{port}"
-
-    # Configure client with SSL certificate if provided
-    client_kwargs = {"base_url": base_url}
-    if use_https and cert_path:
-        client_kwargs["verify"] = cert_path
-
-    client = LlamaStackClient(**client_kwargs)
-
-    algorithm_config = AlgorithmConfigLoraFinetuningConfig(
-        type="LoRA",
-        lora_attn_modules=["q_proj", "v_proj", "output_proj"],
-        apply_lora_to_mlp=True,
-        apply_lora_to_output=False,
-        rank=8,
-        alpha=16,
-    )
-
-    data_config = TrainingConfigDataConfig(
-        dataset_id="alpaca",
-        validation_dataset_id="alpaca",
-        batch_size=1,
-        shuffle=False,
-    )
-
-    optimizer_config = TrainingConfigOptimizerConfig(
-        optimizer_type="adamw",
-        lr=3e-4,
-        weight_decay=0.1,
-        num_warmup_steps=100,
-    )
-
-    effiency_config = TrainingConfigEfficiencyConfig(
-        enable_activation_checkpointing=True,
-    )
-
-    training_config = TrainingConfig(
-        n_epochs=1,
-        data_config=data_config,
-        efficiency_config=effiency_config,
-        optimizer_config=optimizer_config,
-        max_steps_per_epoch=30,
-        gradient_accumulation_steps=1,
-    )
-
-    training_job = client.post_training.supervised_fine_tune(
-        job_uuid=job_uuid,
-        model=model,
-        algorithm_config=algorithm_config,
-        training_config=training_config,
-        checkpoint_dir=checkpoint_dir,
-        # logger_config and hyperparam_search_config haven't been used yet
-        logger_config={},
-        hyperparam_search_config={},
-    )
-
-    print(f"finished the training job: {training_job.job_uuid}")
-
-
-def main(
-    host: str,
-    port: int,
-    job_uuid: str,
-    model: str,
-    use_https: bool = False,
-    checkpoint_dir: Optional[str] = "null",
-    cert_path: Optional[str] = None,
-):
-    job_uuid = str(job_uuid)
-    asyncio.run(run_main(host, port, job_uuid, model, use_https, checkpoint_dir, cert_path))
-
-
-if __name__ == "__main__":
-    fire.Fire(main)
diff --git a/mypy.ini b/mypy.ini
index 50e57de6..71407d9d 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -5,7 +5,10 @@ show_error_codes = True
 # Exclude _files.py because mypy isn't smart enough to apply
 # the correct type narrowing and as this is an internal module
 # it's fine to just use Pyright.
-exclude = ^(src/llama_stack_client/_files\.py|_dev/.*\.py)$
+#
+# We also exclude our `tests` as mypy doesn't always infer
+# types correctly and Pyright will still catch any type errors.
+exclude = ^(src/llama_stack_cli/_files\.py|_dev/.*\.py|tests/.*)$
 
 strict_equality = True
 implicit_reexport = True
@@ -38,7 +41,7 @@ cache_fine_grained = True
 # ```
 # Changing this codegen to make mypy happy would increase complexity
 # and would not be worth it.
-disable_error_code = func-returns-value
+disable_error_code = func-returns-value,overload-cannot-match
 
 # https://github.com/python/mypy/issues/12162
 [mypy.overrides]
diff --git a/pyproject.toml b/pyproject.toml
index e97ef0b4..4d033ff2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,32 +1,30 @@
 [project]
-name = "llama_stack_client"
-version = "0.2.13"
-description = "The official Python library for the llama-stack-client API"
+name = "llama_stack_cli"
+version = "0.0.1-alpha.0"
+description = "The official Python library for the llama-stack-cli API"
 dynamic = ["readme"]
 license = "Apache-2.0"
-authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }]
+authors = [
+{ name = "Llama Stack Cli", email = "" },
+]
 dependencies = [
     "httpx>=0.23.0, <1",
     "pydantic>=1.9.0, <3",
-    "typing-extensions>=4.7, <5",
+    "typing-extensions>=4.10, <5",
     "anyio>=3.5.0, <5",
     "distro>=1.7.0, <2",
     "sniffio",
-    "tqdm",
-    "rich",
-    "click",
-    "pyaml",
-    "prompt_toolkit",
-    "pandas",
-    "termcolor",
-    "fire",
-    "requests",
 ]
-requires-python = ">= 3.12"
+requires-python = ">= 3.8"
 classifiers = [
   "Typing :: Typed",
   "Intended Audience :: Developers",
+  "Programming Language :: Python :: 3.8",
+  "Programming Language :: Python :: 3.9",
+  "Programming Language :: Python :: 3.10",
+  "Programming Language :: Python :: 3.11",
   "Programming Language :: Python :: 3.12",
+  "Programming Language :: Python :: 3.13",
   "Operating System :: OS Independent",
   "Operating System :: POSIX",
   "Operating System :: MacOS",
@@ -36,26 +34,63 @@ classifiers = [
   "License :: OSI Approved :: Apache Software License"
 ]
 
-[dependency-groups]
-dev = [
-    "pytest>=7.1.1",
-    "pytest-asyncio",
-    "pre-commit",
-    "black",
-    "ruff",
+[project.urls]
+Homepage = "https://github.com/stainless-sdks/llama-stack-cli-python"
+Repository = "https://github.com/stainless-sdks/llama-stack-cli-python"
+
+[project.optional-dependencies]
+aiohttp = ["aiohttp", "httpx_aiohttp>=0.1.8"]
+
+[tool.rye]
+managed = true
+# version pins are in requirements-dev.lock
+dev-dependencies = [
+    "pyright==1.1.399",
     "mypy",
     "respx",
-    "dirty-equals"
+    "pytest",
+    "pytest-asyncio",
+    "ruff",
+    "time-machine",
+    "nox",
+    "dirty-equals>=0.6.0",
+    "importlib-metadata>=6.7.0",
+    "rich>=13.7.1",
+    "nest_asyncio==1.6.0",
+    "pytest-xdist>=3.6.1",
 ]
 
-[project.urls]
-Homepage = "https://github.com/meta-llama/llama-stack-client-python"
-Repository = "https://github.com/meta-llama/llama-stack-client-python"
-
-
+[tool.rye.scripts]
+format = { chain = [
+  "format:ruff",
+  "format:docs",
+  "fix:ruff",
+  # run formatting again to fix any inconsistencies when imports are stripped
+  "format:ruff",
+]}
+"format:docs" = "python scripts/utils/ruffen-docs.py README.md api.md"
+"format:ruff" = "ruff format"
+
+"lint" = { chain = [
+  "check:ruff",
+  "typecheck",
+  "check:importable",
+]}
+"check:ruff" = "ruff check ."
+"fix:ruff" = "ruff check --fix ."
+
+"check:importable" = "python -c 'import llama_stack_cli'"
+
+typecheck = { chain = [
+  "typecheck:pyright",
+  "typecheck:mypy"
+]}
+"typecheck:pyright" = "pyright"
+"typecheck:verify-types" = "pyright --verifytypes llama_stack_cli --ignoreexternal"
+"typecheck:mypy" = "mypy ."
 
 [build-system]
-requires = ["hatchling", "hatch-fancy-pypi-readme"]
+requires = ["hatchling==1.26.3", "hatch-fancy-pypi-readme"]
 build-backend = "hatchling.build"
 
 [tool.hatch.build]
@@ -64,7 +99,7 @@ include = [
 ]
 
 [tool.hatch.build.targets.wheel]
-packages = ["src/llama_stack_client"]
+packages = ["src/llama_stack_cli"]
 
 [tool.hatch.build.targets.sdist]
 # Basically everything except hidden files/directories (such as .github, .devcontainers, .python-version, etc)
@@ -90,11 +125,41 @@ path = "README.md"
 [[tool.hatch.metadata.hooks.fancy-pypi-readme.substitutions]]
 # replace relative links with absolute links
 pattern = '\[(.+?)\]\(((?!https?://)\S+?)\)'
-replacement = '[\1](https://github.com/meta-llama/llama-stack-client-python/tree/main/\g<2>)'
+replacement = '[\1](https://github.com/stainless-sdks/llama-stack-cli-python/tree/main/\g<2>)'
+
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+addopts = "--tb=short -n auto"
+xfail_strict = true
+asyncio_mode = "auto"
+asyncio_default_fixture_loop_scope = "session"
+filterwarnings = [
+  "error"
+]
+
+[tool.pyright]
+# this enables practically every flag given by pyright.
+# there are a couple of flags that are still disabled by
+# default in strict mode as they are experimental and niche.
+typeCheckingMode = "strict"
+pythonVersion = "3.8"
+
+exclude = [
+    "_dev",
+    ".venv",
+    ".nox",
+]
+
+reportImplicitOverride = true
+reportOverlappingOverload = false
+
+reportImportCycles = false
+reportPrivateUsage = false
 
 [tool.ruff]
 line-length = 120
 output-format = "grouped"
+target-version = "py37"
 
 [tool.ruff.format]
 docstring-code-format = true
@@ -115,7 +180,7 @@ select = [
   "T201",
   "T203",
   # misuse of typing.TYPE_CHECKING
-  "TCH004",
+  "TC004",
   # import rules
   "TID251",
 ]
@@ -137,14 +202,10 @@ length-sort = true
 length-sort-straight = true
 combine-as-imports = true
 extra-standard-library = ["typing_extensions"]
-known-first-party = ["llama_stack_client", "tests"]
+known-first-party = ["llama_stack_cli", "tests"]
 
 [tool.ruff.lint.per-file-ignores]
 "bin/**.py" = ["T201", "T203"]
 "scripts/**.py" = ["T201", "T203"]
 "tests/**.py" = ["T201", "T203"]
-"examples/**.py" = ["T201", "T203", "TCH004", "I", "B"]
-"src/llama_stack_client/lib/**.py" = ["T201", "T203", "TCH004", "I", "B"]
-
-[project.scripts]
-llama-stack-client = "llama_stack_client.lib.cli.llama_stack_client:main"
+"examples/**.py" = ["T201", "T203"]
diff --git a/requirements-dev.lock b/requirements-dev.lock
new file mode 100644
index 00000000..35b1a5fe
--- /dev/null
+++ b/requirements-dev.lock
@@ -0,0 +1,135 @@
+# generated by rye
+# use `rye lock` or `rye sync` to update this lockfile
+#
+# last locked with the following flags:
+#   pre: false
+#   features: []
+#   all-features: true
+#   with-sources: false
+#   generate-hashes: false
+#   universal: false
+
+-e file:.
+aiohappyeyeballs==2.6.1
+    # via aiohttp
+aiohttp==3.12.8
+    # via httpx-aiohttp
+    # via llama-stack-cli
+aiosignal==1.3.2
+    # via aiohttp
+annotated-types==0.6.0
+    # via pydantic
+anyio==4.4.0
+    # via httpx
+    # via llama-stack-cli
+argcomplete==3.1.2
+    # via nox
+async-timeout==5.0.1
+    # via aiohttp
+attrs==25.3.0
+    # via aiohttp
+certifi==2023.7.22
+    # via httpcore
+    # via httpx
+colorlog==6.7.0
+    # via nox
+dirty-equals==0.6.0
+distlib==0.3.7
+    # via virtualenv
+distro==1.8.0
+    # via llama-stack-cli
+exceptiongroup==1.2.2
+    # via anyio
+    # via pytest
+execnet==2.1.1
+    # via pytest-xdist
+filelock==3.12.4
+    # via virtualenv
+frozenlist==1.6.2
+    # via aiohttp
+    # via aiosignal
+h11==0.16.0
+    # via httpcore
+httpcore==1.0.9
+    # via httpx
+httpx==0.28.1
+    # via httpx-aiohttp
+    # via llama-stack-cli
+    # via respx
+httpx-aiohttp==0.1.8
+    # via llama-stack-cli
+idna==3.4
+    # via anyio
+    # via httpx
+    # via yarl
+importlib-metadata==7.0.0
+iniconfig==2.0.0
+    # via pytest
+markdown-it-py==3.0.0
+    # via rich
+mdurl==0.1.2
+    # via markdown-it-py
+multidict==6.4.4
+    # via aiohttp
+    # via yarl
+mypy==1.14.1
+mypy-extensions==1.0.0
+    # via mypy
+nest-asyncio==1.6.0
+nodeenv==1.8.0
+    # via pyright
+nox==2023.4.22
+packaging==23.2
+    # via nox
+    # via pytest
+platformdirs==3.11.0
+    # via virtualenv
+pluggy==1.5.0
+    # via pytest
+propcache==0.3.1
+    # via aiohttp
+    # via yarl
+pydantic==2.10.3
+    # via llama-stack-cli
+pydantic-core==2.27.1
+    # via pydantic
+pygments==2.18.0
+    # via rich
+pyright==1.1.399
+pytest==8.3.3
+    # via pytest-asyncio
+    # via pytest-xdist
+pytest-asyncio==0.24.0
+pytest-xdist==3.7.0
+python-dateutil==2.8.2
+    # via time-machine
+pytz==2023.3.post1
+    # via dirty-equals
+respx==0.22.0
+rich==13.7.1
+ruff==0.9.4
+setuptools==68.2.2
+    # via nodeenv
+six==1.16.0
+    # via python-dateutil
+sniffio==1.3.0
+    # via anyio
+    # via llama-stack-cli
+time-machine==2.9.0
+tomli==2.0.2
+    # via mypy
+    # via pytest
+typing-extensions==4.12.2
+    # via anyio
+    # via llama-stack-cli
+    # via multidict
+    # via mypy
+    # via pydantic
+    # via pydantic-core
+    # via pyright
+virtualenv==20.24.5
+    # via nox
+yarl==1.20.0
+    # via aiohttp
+zipp==3.17.0
+    # via importlib-metadata
diff --git a/requirements.lock b/requirements.lock
new file mode 100644
index 00000000..b74bed05
--- /dev/null
+++ b/requirements.lock
@@ -0,0 +1,72 @@
+# generated by rye
+# use `rye lock` or `rye sync` to update this lockfile
+#
+# last locked with the following flags:
+#   pre: false
+#   features: []
+#   all-features: true
+#   with-sources: false
+#   generate-hashes: false
+#   universal: false
+
+-e file:.
+aiohappyeyeballs==2.6.1
+    # via aiohttp
+aiohttp==3.12.8
+    # via httpx-aiohttp
+    # via llama-stack-cli
+aiosignal==1.3.2
+    # via aiohttp
+annotated-types==0.6.0
+    # via pydantic
+anyio==4.4.0
+    # via httpx
+    # via llama-stack-cli
+async-timeout==5.0.1
+    # via aiohttp
+attrs==25.3.0
+    # via aiohttp
+certifi==2023.7.22
+    # via httpcore
+    # via httpx
+distro==1.8.0
+    # via llama-stack-cli
+exceptiongroup==1.2.2
+    # via anyio
+frozenlist==1.6.2
+    # via aiohttp
+    # via aiosignal
+h11==0.16.0
+    # via httpcore
+httpcore==1.0.9
+    # via httpx
+httpx==0.28.1
+    # via httpx-aiohttp
+    # via llama-stack-cli
+httpx-aiohttp==0.1.8
+    # via llama-stack-cli
+idna==3.4
+    # via anyio
+    # via httpx
+    # via yarl
+multidict==6.4.4
+    # via aiohttp
+    # via yarl
+propcache==0.3.1
+    # via aiohttp
+    # via yarl
+pydantic==2.10.3
+    # via llama-stack-cli
+pydantic-core==2.27.1
+    # via pydantic
+sniffio==1.3.0
+    # via anyio
+    # via llama-stack-cli
+typing-extensions==4.12.2
+    # via anyio
+    # via llama-stack-cli
+    # via multidict
+    # via pydantic
+    # via pydantic-core
+yarl==1.20.0
+    # via aiohttp
diff --git a/scripts/bootstrap b/scripts/bootstrap
index 8c5c60eb..e84fe62c 100755
--- a/scripts/bootstrap
+++ b/scripts/bootstrap
@@ -4,7 +4,7 @@ set -e
 
 cd "$(dirname "$0")/.."
 
-if [ -f "Brewfile" ] && [ "$(uname -s)" = "Darwin" ]; then
+if ! command -v rye >/dev/null 2>&1 && [ -f "Brewfile" ] && [ "$(uname -s)" = "Darwin" ]; then
   brew bundle check >/dev/null 2>&1 || {
     echo "==> Installing Homebrew dependencies…"
     brew bundle
diff --git a/scripts/gen_cli_doc.py b/scripts/gen_cli_doc.py
deleted file mode 100644
index 53aea424..00000000
--- a/scripts/gen_cli_doc.py
+++ /dev/null
@@ -1,82 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import os
-from pathlib import Path
-
-import click
-from llama_stack_client.lib.cli.llama_stack_client import llama_stack_client
-
-
-def generate_markdown_docs(command, parent=None, level=1):
-    """Generate markdown documentation for a click command."""
-    ctx = click.Context(command, info_name=command.name, parent=parent)
-
-    # Start with the command name as a header
-    prefix = "#" * level
-    if level == 1:
-        doc = [f"{prefix} CLI Reference\n"]
-    else:
-        doc = [f"{prefix} {command.name}\n"]
-
-    # Add command help docstring
-    if command.help:
-        doc.append(f"{command.help}\n")
-
-    # Add usage
-    doc.append(f"```\n{command.get_usage(ctx)}\n```\n")
-
-    # Add options if present
-    has_options = False
-    for param in command.get_params(ctx):
-        if isinstance(param, click.Option):
-            if not has_options:
-                doc.append("**Options**\n")
-                has_options = True
-            opts = ", ".join(param.opts)
-            help_text = param.help or ""
-            default = f" [default: {param.default}]" if param.default is not None else ""
-            doc.append(f"* **{opts}**: {help_text}{default}\n")
-
-    # Add arguments if present
-    has_arguments = False
-    for param in command.get_params(ctx):
-        if isinstance(param, click.Argument):
-            if not has_arguments:
-                doc.append("**Arguments**\n")
-                has_arguments = True
-            name = param.name.upper()
-            doc.append(f"* **{name}**\n")
-
-    # If this is a group with commands, add subcommands
-    if isinstance(command, click.Group):
-        doc.append("**Commands**\n")
-        for cmd_name in command.list_commands(ctx):
-            cmd = command.get_command(ctx, cmd_name)
-            cmd_help = cmd.get_short_help_str(limit=80) if cmd else ""
-            doc.append(f"* **{cmd_name}**: {cmd_help}\n")
-
-        # Add detailed subcommand documentation
-        for cmd_name in command.list_commands(ctx):
-            cmd = command.get_command(ctx, cmd_name)
-            if cmd:
-                doc.append("\n")
-                doc.extend(generate_markdown_docs(cmd, ctx, level + 1))
-
-    return doc
-
-
-if __name__ == "__main__":
-    # Generate the docs
-    markdown_lines = generate_markdown_docs(llama_stack_client)
-    markdown = "\n".join(markdown_lines)
-
-    # Write to file
-    file_path = Path(__file__).parent.parent / "docs" / "cli_reference.md"
-    with open(file_path, "w") as f:
-        f.write(markdown)
-
-    print(f"Documentation generated in {file_path}")
diff --git a/scripts/lint b/scripts/lint
index 9a7fc869..ba9286be 100755
--- a/scripts/lint
+++ b/scripts/lint
@@ -1,11 +1,11 @@
 #!/usr/bin/env bash
 
-# set -e
+set -e
 
-# cd "$(dirname "$0")/.."
+cd "$(dirname "$0")/.."
 
-# echo "==> Running lints"
-# rye run lint
+echo "==> Running lints"
+rye run lint
 
-# echo "==> Making sure it imports"
-# rye run python -c 'import llama_stack_client'
+echo "==> Making sure it imports"
+rye run python -c 'import llama_stack_cli'
diff --git a/scripts/test b/scripts/test
index e9e543c7..2b878456 100755
--- a/scripts/test
+++ b/scripts/test
@@ -1,59 +1,61 @@
 #!/usr/bin/env bash
 
-# set -e
-
-# cd "$(dirname "$0")/.."
-
-# RED='\033[0;31m'
-# GREEN='\033[0;32m'
-# YELLOW='\033[0;33m'
-# NC='\033[0m' # No Color
-
-# function prism_is_running() {
-#   curl --silent "http://localhost:4010" >/dev/null 2>&1
-# }
-
-# kill_server_on_port() {
-#   pids=$(lsof -t -i tcp:"$1" || echo "")
-#   if [ "$pids" != "" ]; then
-#     kill "$pids"
-#     echo "Stopped $pids."
-#   fi
-# }
-
-# function is_overriding_api_base_url() {
-#   [ -n "$TEST_API_BASE_URL" ]
-# }
-
-# if ! is_overriding_api_base_url && ! prism_is_running ; then
-#   # When we exit this script, make sure to kill the background mock server process
-#   trap 'kill_server_on_port 4010' EXIT
-
-#   # Start the dev server
-#   ./scripts/mock --daemon
-# fi
-
-# if is_overriding_api_base_url ; then
-#   echo -e "${GREEN}✔ Running tests against ${TEST_API_BASE_URL}${NC}"
-#   echo
-# elif ! prism_is_running ; then
-#   echo -e "${RED}ERROR:${NC} The test suite will not run without a mock Prism server"
-#   echo -e "running against your OpenAPI spec."
-#   echo
-#   echo -e "To run the server, pass in the path or url of your OpenAPI"
-#   echo -e "spec to the prism command:"
-#   echo
-#   echo -e "  \$ ${YELLOW}npm exec --package=@stoplight/prism-cli@~5.3.2 -- prism mock path/to/your.openapi.yml${NC}"
-#   echo
-
-#   exit 1
-# else
-#   echo -e "${GREEN}✔ Mock prism server is running with your OpenAPI spec${NC}"
-#   echo
-# fi
-
-# echo "==> Running tests"
-# rye run pytest "$@"
-
-# echo "==> Running Pydantic v1 tests"
-# rye run nox -s test-pydantic-v1 -- "$@"
+set -e
+
+cd "$(dirname "$0")/.."
+
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[0;33m'
+NC='\033[0m' # No Color
+
+function prism_is_running() {
+  curl --silent "http://localhost:4010" >/dev/null 2>&1
+}
+
+kill_server_on_port() {
+  pids=$(lsof -t -i tcp:"$1" || echo "")
+  if [ "$pids" != "" ]; then
+    kill "$pids"
+    echo "Stopped $pids."
+  fi
+}
+
+function is_overriding_api_base_url() {
+  [ -n "$TEST_API_BASE_URL" ]
+}
+
+if ! is_overriding_api_base_url && ! prism_is_running ; then
+  # When we exit this script, make sure to kill the background mock server process
+  trap 'kill_server_on_port 4010' EXIT
+
+  # Start the dev server
+  ./scripts/mock --daemon
+fi
+
+if is_overriding_api_base_url ; then
+  echo -e "${GREEN}✔ Running tests against ${TEST_API_BASE_URL}${NC}"
+  echo
+elif ! prism_is_running ; then
+  echo -e "${RED}ERROR:${NC} The test suite will not run without a mock Prism server"
+  echo -e "running against your OpenAPI spec."
+  echo
+  echo -e "To run the server, pass in the path or url of your OpenAPI"
+  echo -e "spec to the prism command:"
+  echo
+  echo -e "  \$ ${YELLOW}npm exec --package=@stoplight/prism-cli@~5.3.2 -- prism mock path/to/your.openapi.yml${NC}"
+  echo
+
+  exit 1
+else
+  echo -e "${GREEN}✔ Mock prism server is running with your OpenAPI spec${NC}"
+  echo
+fi
+
+export DEFER_PYDANTIC_BUILD=false
+
+echo "==> Running tests"
+rye run pytest "$@"
+
+echo "==> Running Pydantic v1 tests"
+rye run nox -s test-pydantic-v1 -- "$@"
diff --git a/scripts/utils/ruffen-docs.py b/scripts/utils/ruffen-docs.py
index 37b3d94f..0cf2bd2f 100644
--- a/scripts/utils/ruffen-docs.py
+++ b/scripts/utils/ruffen-docs.py
@@ -47,7 +47,7 @@ def _md_match(match: Match[str]) -> str:
         with _collect_error(match):
             code = format_code_block(code)
         code = textwrap.indent(code, match["indent"])
-        return f'{match["before"]}{code}{match["after"]}'
+        return f"{match['before']}{code}{match['after']}"
 
     def _pycon_match(match: Match[str]) -> str:
         code = ""
@@ -97,7 +97,7 @@ def finish_fragment() -> None:
     def _md_pycon_match(match: Match[str]) -> str:
         code = _pycon_match(match)
         code = textwrap.indent(code, match["indent"])
-        return f'{match["before"]}{code}{match["after"]}'
+        return f"{match['before']}{code}{match['after']}"
 
     src = MD_RE.sub(_md_match, src)
     src = MD_PYCON_RE.sub(_md_pycon_match, src)
diff --git a/scripts/utils/upload-artifact.sh b/scripts/utils/upload-artifact.sh
new file mode 100755
index 00000000..22cb93d2
--- /dev/null
+++ b/scripts/utils/upload-artifact.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+set -exuo pipefail
+
+FILENAME=$(basename dist/*.whl)
+
+RESPONSE=$(curl -X POST "$URL?filename=$FILENAME" \
+  -H "Authorization: Bearer $AUTH" \
+  -H "Content-Type: application/json")
+
+SIGNED_URL=$(echo "$RESPONSE" | jq -r '.url')
+
+if [[ "$SIGNED_URL" == "null" ]]; then
+  echo -e "\033[31mFailed to get signed URL.\033[0m"
+  exit 1
+fi
+
+UPLOAD_RESPONSE=$(curl -v -X PUT \
+  -H "Content-Type: binary/octet-stream" \
+  --data-binary "@dist/$FILENAME" "$SIGNED_URL" 2>&1)
+
+if echo "$UPLOAD_RESPONSE" | grep -q "HTTP/[0-9.]* 200"; then
+  echo -e "\033[32mUploaded build to Stainless storage.\033[0m"
+  echo -e "\033[32mInstallation: pip install 'https://pkg.stainless.com/s/llama-stack-cli-python/$SHA/$FILENAME'\033[0m"
+else
+  echo -e "\033[31mFailed to upload artifact.\033[0m"
+  exit 1
+fi
diff --git a/src/llama_stack_client/__init__.py b/src/llama_stack_cli/__init__.py
similarity index 79%
rename from src/llama_stack_client/__init__.py
rename to src/llama_stack_cli/__init__.py
index 4fdd36f2..de52e0fa 100644
--- a/src/llama_stack_client/__init__.py
+++ b/src/llama_stack_cli/__init__.py
@@ -12,9 +12,9 @@
     Transport,
     AsyncClient,
     AsyncStream,
+    LlamaStackCli,
     RequestOptions,
-    LlamaStackClient,
-    AsyncLlamaStackClient,
+    AsyncLlamaStackCli,
 )
 from ._models import BaseModel
 from ._version import __title__, __version__
@@ -29,9 +29,9 @@
     APITimeoutError,
     BadRequestError,
     APIConnectionError,
+    LlamaStackCliError,
     AuthenticationError,
     InternalServerError,
-    LlamaStackClientError,
     PermissionDeniedError,
     UnprocessableEntityError,
     APIResponseValidationError,
@@ -39,12 +39,6 @@
 from ._base_client import DefaultHttpxClient, DefaultAioHttpClient, DefaultAsyncHttpxClient
 from ._utils._logs import setup_logging as _setup_logging
 
-from .lib.agents.agent import Agent
-from .lib.agents.event_logger import EventLogger as AgentEventLogger
-from .lib.inference.event_logger import EventLogger as InferenceEventLogger
-from .types.agents.turn_create_params import Document
-from .types.shared_params.document import Document as RAGDocument
-
 __all__ = [
     "types",
     "__version__",
@@ -55,7 +49,7 @@
     "NotGiven",
     "NOT_GIVEN",
     "Omit",
-    "LlamaStackClientError",
+    "LlamaStackCliError",
     "APIError",
     "APIStatusError",
     "APITimeoutError",
@@ -75,8 +69,8 @@
     "AsyncClient",
     "Stream",
     "AsyncStream",
-    "LlamaStackClient",
-    "AsyncLlamaStackClient",
+    "LlamaStackCli",
+    "AsyncLlamaStackCli",
     "file_from_path",
     "BaseModel",
     "DEFAULT_TIMEOUT",
@@ -95,12 +89,12 @@
 # Update the __module__ attribute for exported symbols so that
 # error messages point to this module instead of the module
 # it was originally defined in, e.g.
-# llama_stack_client._exceptions.NotFoundError -> llama_stack_client.NotFoundError
+# llama_stack_cli._exceptions.NotFoundError -> llama_stack_cli.NotFoundError
 __locals = locals()
 for __name in __all__:
     if not __name.startswith("__"):
         try:
-            __locals[__name].__module__ = "llama_stack_client"
+            __locals[__name].__module__ = "llama_stack_cli"
         except (TypeError, AttributeError):
             # Some of our exported symbols are builtins which we can't set attributes for.
             pass
diff --git a/src/llama_stack_client/_base_client.py b/src/llama_stack_cli/_base_client.py
similarity index 99%
rename from src/llama_stack_client/_base_client.py
rename to src/llama_stack_cli/_base_client.py
index a0bbc468..03d97a29 100644
--- a/src/llama_stack_client/_base_client.py
+++ b/src/llama_stack_cli/_base_client.py
@@ -389,7 +389,7 @@ def __init__(
 
         if max_retries is None:  # pyright: ignore[reportUnnecessaryComparison]
             raise TypeError(
-                "max_retries cannot be None. If you want to disable retries, pass `0`; if you want unlimited retries, pass `math.inf` or a very high number; if you want the default behavior, pass `llama_stack_client.DEFAULT_MAX_RETRIES`"
+                "max_retries cannot be None. If you want to disable retries, pass `0`; if you want unlimited retries, pass `math.inf` or a very high number; if you want the default behavior, pass `llama_stack_cli.DEFAULT_MAX_RETRIES`"
             )
 
     def _enforce_trailing_slash(self, url: URL) -> URL:
@@ -529,6 +529,15 @@ def _build_request(
             # work around https://github.com/encode/httpx/discussions/2880
             kwargs["extensions"] = {"sni_hostname": prepared_url.host.replace("_", "-")}
 
+        is_body_allowed = options.method.lower() != "get"
+
+        if is_body_allowed:
+            kwargs["json"] = json_data if is_given(json_data) else None
+            kwargs["files"] = files
+        else:
+            headers.pop("Content-Type", None)
+            kwargs.pop("data", None)
+
         # TODO: report this error to httpx
         return self._client.build_request(  # pyright: ignore[reportUnknownMemberType]
             headers=headers,
@@ -540,8 +549,6 @@ def _build_request(
             # so that passing a `TypedDict` doesn't cause an error.
             # https://github.com/microsoft/pyright/issues/3526#event-6715453066
             params=self.qs.stringify(cast(Mapping[str, Any], params)) if params else None,
-            json=json_data if is_given(json_data) else None,
-            files=files,
             **kwargs,
         )
 
diff --git a/src/llama_stack_cli/_client.py b/src/llama_stack_cli/_client.py
new file mode 100644
index 00000000..fe48a12b
--- /dev/null
+++ b/src/llama_stack_cli/_client.py
@@ -0,0 +1,420 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, Union, Mapping
+from typing_extensions import Self, override
+
+import httpx
+
+from . import _exceptions
+from ._qs import Querystring
+from ._types import (
+    NOT_GIVEN,
+    Omit,
+    Timeout,
+    NotGiven,
+    Transport,
+    ProxiesTypes,
+    RequestOptions,
+)
+from ._utils import is_given, get_async_library
+from ._version import __version__
+from .resources import pet, user
+from ._streaming import Stream as Stream, AsyncStream as AsyncStream
+from ._exceptions import APIStatusError, LlamaStackCliError
+from ._base_client import (
+    DEFAULT_MAX_RETRIES,
+    SyncAPIClient,
+    AsyncAPIClient,
+)
+from .resources.store import store
+
+__all__ = [
+    "Timeout",
+    "Transport",
+    "ProxiesTypes",
+    "RequestOptions",
+    "LlamaStackCli",
+    "AsyncLlamaStackCli",
+    "Client",
+    "AsyncClient",
+]
+
+
+class LlamaStackCli(SyncAPIClient):
+    pet: pet.PetResource
+    store: store.StoreResource
+    user: user.UserResource
+    with_raw_response: LlamaStackCliWithRawResponse
+    with_streaming_response: LlamaStackCliWithStreamedResponse
+
+    # client options
+    api_key: str
+
+    def __init__(
+        self,
+        *,
+        api_key: str | None = None,
+        base_url: str | httpx.URL | None = None,
+        timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
+        max_retries: int = DEFAULT_MAX_RETRIES,
+        default_headers: Mapping[str, str] | None = None,
+        default_query: Mapping[str, object] | None = None,
+        # Configure a custom httpx client.
+        # We provide a `DefaultHttpxClient` class that you can pass to retain the default values we use for `limits`, `timeout` & `follow_redirects`.
+        # See the [httpx documentation](https://www.python-httpx.org/api/#client) for more details.
+        http_client: httpx.Client | None = None,
+        # Enable or disable schema validation for data returned by the API.
+        # When enabled an error APIResponseValidationError is raised
+        # if the API responds with invalid data for the expected schema.
+        #
+        # This parameter may be removed or changed in the future.
+        # If you rely on this feature, please open a GitHub issue
+        # outlining your use-case to help us decide if it should be
+        # part of our public interface in the future.
+        _strict_response_validation: bool = False,
+    ) -> None:
+        """Construct a new synchronous LlamaStackCli client instance.
+
+        This automatically infers the `api_key` argument from the `PETSTORE_API_KEY` environment variable if it is not provided.
+        """
+        if api_key is None:
+            api_key = os.environ.get("PETSTORE_API_KEY")
+        if api_key is None:
+            raise LlamaStackCliError(
+                "The api_key client option must be set either by passing api_key to the client or by setting the PETSTORE_API_KEY environment variable"
+            )
+        self.api_key = api_key
+
+        if base_url is None:
+            base_url = os.environ.get("LLAMA_STACK_CLI_BASE_URL")
+        if base_url is None:
+            base_url = f"https://petstore3.swagger.io/api/v3"
+
+        super().__init__(
+            version=__version__,
+            base_url=base_url,
+            max_retries=max_retries,
+            timeout=timeout,
+            http_client=http_client,
+            custom_headers=default_headers,
+            custom_query=default_query,
+            _strict_response_validation=_strict_response_validation,
+        )
+
+        self.pet = pet.PetResource(self)
+        self.store = store.StoreResource(self)
+        self.user = user.UserResource(self)
+        self.with_raw_response = LlamaStackCliWithRawResponse(self)
+        self.with_streaming_response = LlamaStackCliWithStreamedResponse(self)
+
+    @property
+    @override
+    def qs(self) -> Querystring:
+        return Querystring(array_format="comma")
+
+    @property
+    @override
+    def auth_headers(self) -> dict[str, str]:
+        api_key = self.api_key
+        return {"api_key": api_key}
+
+    @property
+    @override
+    def default_headers(self) -> dict[str, str | Omit]:
+        return {
+            **super().default_headers,
+            "X-Stainless-Async": "false",
+            **self._custom_headers,
+        }
+
+    def copy(
+        self,
+        *,
+        api_key: str | None = None,
+        base_url: str | httpx.URL | None = None,
+        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
+        http_client: httpx.Client | None = None,
+        max_retries: int | NotGiven = NOT_GIVEN,
+        default_headers: Mapping[str, str] | None = None,
+        set_default_headers: Mapping[str, str] | None = None,
+        default_query: Mapping[str, object] | None = None,
+        set_default_query: Mapping[str, object] | None = None,
+        _extra_kwargs: Mapping[str, Any] = {},
+    ) -> Self:
+        """
+        Create a new client instance re-using the same options given to the current client with optional overriding.
+        """
+        if default_headers is not None and set_default_headers is not None:
+            raise ValueError("The `default_headers` and `set_default_headers` arguments are mutually exclusive")
+
+        if default_query is not None and set_default_query is not None:
+            raise ValueError("The `default_query` and `set_default_query` arguments are mutually exclusive")
+
+        headers = self._custom_headers
+        if default_headers is not None:
+            headers = {**headers, **default_headers}
+        elif set_default_headers is not None:
+            headers = set_default_headers
+
+        params = self._custom_query
+        if default_query is not None:
+            params = {**params, **default_query}
+        elif set_default_query is not None:
+            params = set_default_query
+
+        http_client = http_client or self._client
+        return self.__class__(
+            api_key=api_key or self.api_key,
+            base_url=base_url or self.base_url,
+            timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
+            http_client=http_client,
+            max_retries=max_retries if is_given(max_retries) else self.max_retries,
+            default_headers=headers,
+            default_query=params,
+            **_extra_kwargs,
+        )
+
+    # Alias for `copy` for nicer inline usage, e.g.
+    # client.with_options(timeout=10).foo.create(...)
+    with_options = copy
+
+    @override
+    def _make_status_error(
+        self,
+        err_msg: str,
+        *,
+        body: object,
+        response: httpx.Response,
+    ) -> APIStatusError:
+        if response.status_code == 400:
+            return _exceptions.BadRequestError(err_msg, response=response, body=body)
+
+        if response.status_code == 401:
+            return _exceptions.AuthenticationError(err_msg, response=response, body=body)
+
+        if response.status_code == 403:
+            return _exceptions.PermissionDeniedError(err_msg, response=response, body=body)
+
+        if response.status_code == 404:
+            return _exceptions.NotFoundError(err_msg, response=response, body=body)
+
+        if response.status_code == 409:
+            return _exceptions.ConflictError(err_msg, response=response, body=body)
+
+        if response.status_code == 422:
+            return _exceptions.UnprocessableEntityError(err_msg, response=response, body=body)
+
+        if response.status_code == 429:
+            return _exceptions.RateLimitError(err_msg, response=response, body=body)
+
+        if response.status_code >= 500:
+            return _exceptions.InternalServerError(err_msg, response=response, body=body)
+        return APIStatusError(err_msg, response=response, body=body)
+
+
+class AsyncLlamaStackCli(AsyncAPIClient):
+    pet: pet.AsyncPetResource
+    store: store.AsyncStoreResource
+    user: user.AsyncUserResource
+    with_raw_response: AsyncLlamaStackCliWithRawResponse
+    with_streaming_response: AsyncLlamaStackCliWithStreamedResponse
+
+    # client options
+    api_key: str
+
+    def __init__(
+        self,
+        *,
+        api_key: str | None = None,
+        base_url: str | httpx.URL | None = None,
+        timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
+        max_retries: int = DEFAULT_MAX_RETRIES,
+        default_headers: Mapping[str, str] | None = None,
+        default_query: Mapping[str, object] | None = None,
+        # Configure a custom httpx client.
+        # We provide a `DefaultAsyncHttpxClient` class that you can pass to retain the default values we use for `limits`, `timeout` & `follow_redirects`.
+        # See the [httpx documentation](https://www.python-httpx.org/api/#asyncclient) for more details.
+        http_client: httpx.AsyncClient | None = None,
+        # Enable or disable schema validation for data returned by the API.
+        # When enabled an error APIResponseValidationError is raised
+        # if the API responds with invalid data for the expected schema.
+        #
+        # This parameter may be removed or changed in the future.
+        # If you rely on this feature, please open a GitHub issue
+        # outlining your use-case to help us decide if it should be
+        # part of our public interface in the future.
+        _strict_response_validation: bool = False,
+    ) -> None:
+        """Construct a new async AsyncLlamaStackCli client instance.
+
+        This automatically infers the `api_key` argument from the `PETSTORE_API_KEY` environment variable if it is not provided.
+        """
+        if api_key is None:
+            api_key = os.environ.get("PETSTORE_API_KEY")
+        if api_key is None:
+            raise LlamaStackCliError(
+                "The api_key client option must be set either by passing api_key to the client or by setting the PETSTORE_API_KEY environment variable"
+            )
+        self.api_key = api_key
+
+        if base_url is None:
+            base_url = os.environ.get("LLAMA_STACK_CLI_BASE_URL")
+        if base_url is None:
+            base_url = f"https://petstore3.swagger.io/api/v3"
+
+        super().__init__(
+            version=__version__,
+            base_url=base_url,
+            max_retries=max_retries,
+            timeout=timeout,
+            http_client=http_client,
+            custom_headers=default_headers,
+            custom_query=default_query,
+            _strict_response_validation=_strict_response_validation,
+        )
+
+        self.pet = pet.AsyncPetResource(self)
+        self.store = store.AsyncStoreResource(self)
+        self.user = user.AsyncUserResource(self)
+        self.with_raw_response = AsyncLlamaStackCliWithRawResponse(self)
+        self.with_streaming_response = AsyncLlamaStackCliWithStreamedResponse(self)
+
+    @property
+    @override
+    def qs(self) -> Querystring:
+        return Querystring(array_format="comma")
+
+    @property
+    @override
+    def auth_headers(self) -> dict[str, str]:
+        api_key = self.api_key
+        return {"api_key": api_key}
+
+    @property
+    @override
+    def default_headers(self) -> dict[str, str | Omit]:
+        return {
+            **super().default_headers,
+            "X-Stainless-Async": f"async:{get_async_library()}",
+            **self._custom_headers,
+        }
+
+    def copy(
+        self,
+        *,
+        api_key: str | None = None,
+        base_url: str | httpx.URL | None = None,
+        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
+        http_client: httpx.AsyncClient | None = None,
+        max_retries: int | NotGiven = NOT_GIVEN,
+        default_headers: Mapping[str, str] | None = None,
+        set_default_headers: Mapping[str, str] | None = None,
+        default_query: Mapping[str, object] | None = None,
+        set_default_query: Mapping[str, object] | None = None,
+        _extra_kwargs: Mapping[str, Any] = {},
+    ) -> Self:
+        """
+        Create a new client instance re-using the same options given to the current client with optional overriding.
+        """
+        if default_headers is not None and set_default_headers is not None:
+            raise ValueError("The `default_headers` and `set_default_headers` arguments are mutually exclusive")
+
+        if default_query is not None and set_default_query is not None:
+            raise ValueError("The `default_query` and `set_default_query` arguments are mutually exclusive")
+
+        headers = self._custom_headers
+        if default_headers is not None:
+            headers = {**headers, **default_headers}
+        elif set_default_headers is not None:
+            headers = set_default_headers
+
+        params = self._custom_query
+        if default_query is not None:
+            params = {**params, **default_query}
+        elif set_default_query is not None:
+            params = set_default_query
+
+        http_client = http_client or self._client
+        return self.__class__(
+            api_key=api_key or self.api_key,
+            base_url=base_url or self.base_url,
+            timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
+            http_client=http_client,
+            max_retries=max_retries if is_given(max_retries) else self.max_retries,
+            default_headers=headers,
+            default_query=params,
+            **_extra_kwargs,
+        )
+
+    # Alias for `copy` for nicer inline usage, e.g.
+    # client.with_options(timeout=10).foo.create(...)
+    with_options = copy
+
+    @override
+    def _make_status_error(
+        self,
+        err_msg: str,
+        *,
+        body: object,
+        response: httpx.Response,
+    ) -> APIStatusError:
+        if response.status_code == 400:
+            return _exceptions.BadRequestError(err_msg, response=response, body=body)
+
+        if response.status_code == 401:
+            return _exceptions.AuthenticationError(err_msg, response=response, body=body)
+
+        if response.status_code == 403:
+            return _exceptions.PermissionDeniedError(err_msg, response=response, body=body)
+
+        if response.status_code == 404:
+            return _exceptions.NotFoundError(err_msg, response=response, body=body)
+
+        if response.status_code == 409:
+            return _exceptions.ConflictError(err_msg, response=response, body=body)
+
+        if response.status_code == 422:
+            return _exceptions.UnprocessableEntityError(err_msg, response=response, body=body)
+
+        if response.status_code == 429:
+            return _exceptions.RateLimitError(err_msg, response=response, body=body)
+
+        if response.status_code >= 500:
+            return _exceptions.InternalServerError(err_msg, response=response, body=body)
+        return APIStatusError(err_msg, response=response, body=body)
+
+
+class LlamaStackCliWithRawResponse:
+    def __init__(self, client: LlamaStackCli) -> None:
+        self.pet = pet.PetResourceWithRawResponse(client.pet)
+        self.store = store.StoreResourceWithRawResponse(client.store)
+        self.user = user.UserResourceWithRawResponse(client.user)
+
+
+class AsyncLlamaStackCliWithRawResponse:
+    def __init__(self, client: AsyncLlamaStackCli) -> None:
+        self.pet = pet.AsyncPetResourceWithRawResponse(client.pet)
+        self.store = store.AsyncStoreResourceWithRawResponse(client.store)
+        self.user = user.AsyncUserResourceWithRawResponse(client.user)
+
+
+class LlamaStackCliWithStreamedResponse:
+    def __init__(self, client: LlamaStackCli) -> None:
+        self.pet = pet.PetResourceWithStreamingResponse(client.pet)
+        self.store = store.StoreResourceWithStreamingResponse(client.store)
+        self.user = user.UserResourceWithStreamingResponse(client.user)
+
+
+class AsyncLlamaStackCliWithStreamedResponse:
+    def __init__(self, client: AsyncLlamaStackCli) -> None:
+        self.pet = pet.AsyncPetResourceWithStreamingResponse(client.pet)
+        self.store = store.AsyncStoreResourceWithStreamingResponse(client.store)
+        self.user = user.AsyncUserResourceWithStreamingResponse(client.user)
+
+
+Client = LlamaStackCli
+
+AsyncClient = AsyncLlamaStackCli
diff --git a/src/llama_stack_client/_compat.py b/src/llama_stack_cli/_compat.py
similarity index 100%
rename from src/llama_stack_client/_compat.py
rename to src/llama_stack_cli/_compat.py
diff --git a/src/llama_stack_client/_constants.py b/src/llama_stack_cli/_constants.py
similarity index 100%
rename from src/llama_stack_client/_constants.py
rename to src/llama_stack_cli/_constants.py
diff --git a/src/llama_stack_client/_exceptions.py b/src/llama_stack_cli/_exceptions.py
similarity index 97%
rename from src/llama_stack_client/_exceptions.py
rename to src/llama_stack_cli/_exceptions.py
index 54cb1cde..b0b10ed4 100644
--- a/src/llama_stack_client/_exceptions.py
+++ b/src/llama_stack_cli/_exceptions.py
@@ -18,11 +18,11 @@
 ]
 
 
-class LlamaStackClientError(Exception):
+class LlamaStackCliError(Exception):
     pass
 
 
-class APIError(LlamaStackClientError):
+class APIError(LlamaStackCliError):
     message: str
     request: httpx.Request
 
diff --git a/src/llama_stack_client/_files.py b/src/llama_stack_cli/_files.py
similarity index 96%
rename from src/llama_stack_client/_files.py
rename to src/llama_stack_cli/_files.py
index 45f57c0a..715cc207 100644
--- a/src/llama_stack_client/_files.py
+++ b/src/llama_stack_cli/_files.py
@@ -34,7 +34,7 @@ def assert_is_file_content(obj: object, *, key: str | None = None) -> None:
     if not is_file_content(obj):
         prefix = f"Expected entry at `{key}`" if key is not None else f"Expected file input `{obj!r}`"
         raise RuntimeError(
-            f"{prefix} to be bytes, an io.IOBase instance, PathLike or a tuple but received {type(obj)} instead. See https://github.com/llamastack/llama-stack-client-python/tree/main#file-uploads"
+            f"{prefix} to be bytes, an io.IOBase instance, PathLike or a tuple but received {type(obj)} instead."
         ) from None
 
 
diff --git a/src/llama_stack_client/_models.py b/src/llama_stack_cli/_models.py
similarity index 98%
rename from src/llama_stack_client/_models.py
rename to src/llama_stack_cli/_models.py
index 4f214980..528d5680 100644
--- a/src/llama_stack_client/_models.py
+++ b/src/llama_stack_cli/_models.py
@@ -2,9 +2,10 @@
 
 import os
 import inspect
-from typing import TYPE_CHECKING, Any, Type, Union, Generic, TypeVar, Callable, cast
+from typing import TYPE_CHECKING, Any, Type, Union, Generic, TypeVar, Callable, Optional, cast
 from datetime import date, datetime
 from typing_extensions import (
+    List,
     Unpack,
     Literal,
     ClassVar,
@@ -366,7 +367,7 @@ def _construct_field(value: object, field: FieldInfo, key: str) -> object:
     if type_ is None:
         raise RuntimeError(f"Unexpected field type is None for {key}")
 
-    return construct_type(value=value, type_=type_)
+    return construct_type(value=value, type_=type_, metadata=getattr(field, "metadata", None))
 
 
 def is_basemodel(type_: type) -> bool:
@@ -420,7 +421,7 @@ def construct_type_unchecked(*, value: object, type_: type[_T]) -> _T:
     return cast(_T, construct_type(value=value, type_=type_))
 
 
-def construct_type(*, value: object, type_: object) -> object:
+def construct_type(*, value: object, type_: object, metadata: Optional[List[Any]] = None) -> object:
     """Loose coercion to the expected type with construction of nested values.
 
     If the given value does not match the expected type then it is returned as-is.
@@ -438,8 +439,10 @@ def construct_type(*, value: object, type_: object) -> object:
         type_ = type_.__value__  # type: ignore[unreachable]
 
     # unwrap `Annotated[T, ...]` -> `T`
-    if is_annotated_type(type_):
-        meta: tuple[Any, ...] = get_args(type_)[1:]
+    if metadata is not None:
+        meta: tuple[Any, ...] = tuple(metadata)
+    elif is_annotated_type(type_):
+        meta = get_args(type_)[1:]
         type_ = extract_type_arg(type_, 0)
     else:
         meta = tuple()
diff --git a/src/llama_stack_client/_qs.py b/src/llama_stack_cli/_qs.py
similarity index 100%
rename from src/llama_stack_client/_qs.py
rename to src/llama_stack_cli/_qs.py
diff --git a/src/llama_stack_client/_resource.py b/src/llama_stack_cli/_resource.py
similarity index 78%
rename from src/llama_stack_client/_resource.py
rename to src/llama_stack_cli/_resource.py
index 8a6f4ec6..70340a40 100644
--- a/src/llama_stack_client/_resource.py
+++ b/src/llama_stack_cli/_resource.py
@@ -8,13 +8,13 @@
 import anyio
 
 if TYPE_CHECKING:
-    from ._client import LlamaStackClient, AsyncLlamaStackClient
+    from ._client import LlamaStackCli, AsyncLlamaStackCli
 
 
 class SyncAPIResource:
-    _client: LlamaStackClient
+    _client: LlamaStackCli
 
-    def __init__(self, client: LlamaStackClient) -> None:
+    def __init__(self, client: LlamaStackCli) -> None:
         self._client = client
         self._get = client.get
         self._post = client.post
@@ -28,9 +28,9 @@ def _sleep(self, seconds: float) -> None:
 
 
 class AsyncAPIResource:
-    _client: AsyncLlamaStackClient
+    _client: AsyncLlamaStackCli
 
-    def __init__(self, client: AsyncLlamaStackClient) -> None:
+    def __init__(self, client: AsyncLlamaStackCli) -> None:
         self._client = client
         self._get = client.get
         self._post = client.post
diff --git a/src/llama_stack_client/_response.py b/src/llama_stack_cli/_response.py
similarity index 98%
rename from src/llama_stack_client/_response.py
rename to src/llama_stack_cli/_response.py
index 8486ab8e..a5772d04 100644
--- a/src/llama_stack_client/_response.py
+++ b/src/llama_stack_cli/_response.py
@@ -29,7 +29,7 @@
 from ._models import BaseModel, is_basemodel
 from ._constants import RAW_RESPONSE_HEADER, OVERRIDE_CAST_TO_HEADER
 from ._streaming import Stream, AsyncStream, is_stream_class_type, extract_stream_chunk_type
-from ._exceptions import LlamaStackClientError, APIResponseValidationError
+from ._exceptions import LlamaStackCliError, APIResponseValidationError
 
 if TYPE_CHECKING:
     from ._models import FinalRequestOptions
@@ -218,7 +218,7 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
             and issubclass(origin, pydantic.BaseModel)
         ):
             raise TypeError(
-                "Pydantic models must subclass our base model type, e.g. `from llama_stack_client import BaseModel`"
+                "Pydantic models must subclass our base model type, e.g. `from llama_stack_cli import BaseModel`"
             )
 
         if (
@@ -285,7 +285,7 @@ def parse(self, *, to: type[_T] | None = None) -> R | _T:
         the `to` argument, e.g.
 
         ```py
-        from llama_stack_client import BaseModel
+        from llama_stack_cli import BaseModel
 
 
         class MyModel(BaseModel):
@@ -387,7 +387,7 @@ async def parse(self, *, to: type[_T] | None = None) -> R | _T:
         the `to` argument, e.g.
 
         ```py
-        from llama_stack_client import BaseModel
+        from llama_stack_cli import BaseModel
 
 
         class MyModel(BaseModel):
@@ -558,11 +558,11 @@ async def stream_to_file(
 class MissingStreamClassError(TypeError):
     def __init__(self) -> None:
         super().__init__(
-            "The `stream` argument was set to `True` but the `stream_cls` argument was not given. See `llama_stack_client._streaming` for reference",
+            "The `stream` argument was set to `True` but the `stream_cls` argument was not given. See `llama_stack_cli._streaming` for reference",
         )
 
 
-class StreamAlreadyConsumed(LlamaStackClientError):
+class StreamAlreadyConsumed(LlamaStackCliError):
     """
     Attempted to read or stream content, but the content has already
     been streamed.
diff --git a/src/llama_stack_client/_streaming.py b/src/llama_stack_cli/_streaming.py
similarity index 98%
rename from src/llama_stack_client/_streaming.py
rename to src/llama_stack_cli/_streaming.py
index 8c436e93..3da9e6be 100644
--- a/src/llama_stack_client/_streaming.py
+++ b/src/llama_stack_cli/_streaming.py
@@ -12,7 +12,7 @@
 from ._utils import extract_type_var_from_base
 
 if TYPE_CHECKING:
-    from ._client import LlamaStackClient, AsyncLlamaStackClient
+    from ._client import LlamaStackCli, AsyncLlamaStackCli
 
 
 _T = TypeVar("_T")
@@ -30,7 +30,7 @@ def __init__(
         *,
         cast_to: type[_T],
         response: httpx.Response,
-        client: LlamaStackClient,
+        client: LlamaStackCli,
     ) -> None:
         self.response = response
         self._cast_to = cast_to
@@ -93,7 +93,7 @@ def __init__(
         *,
         cast_to: type[_T],
         response: httpx.Response,
-        client: AsyncLlamaStackClient,
+        client: AsyncLlamaStackCli,
     ) -> None:
         self.response = response
         self._cast_to = cast_to
diff --git a/src/llama_stack_client/_types.py b/src/llama_stack_cli/_types.py
similarity index 99%
rename from src/llama_stack_client/_types.py
rename to src/llama_stack_cli/_types.py
index 63631322..60e5ea6f 100644
--- a/src/llama_stack_client/_types.py
+++ b/src/llama_stack_cli/_types.py
@@ -81,7 +81,7 @@
 # This unfortunately means that you will either have
 # to import this type and pass it explicitly:
 #
-# from llama_stack_client import NoneType
+# from llama_stack_cli import NoneType
 # client.get('/foo', cast_to=NoneType)
 #
 # or build it yourself:
diff --git a/src/llama_stack_client/_utils/__init__.py b/src/llama_stack_cli/_utils/__init__.py
similarity index 100%
rename from src/llama_stack_client/_utils/__init__.py
rename to src/llama_stack_cli/_utils/__init__.py
diff --git a/src/llama_stack_client/_utils/_logs.py b/src/llama_stack_cli/_utils/_logs.py
similarity index 62%
rename from src/llama_stack_client/_utils/_logs.py
rename to src/llama_stack_cli/_utils/_logs.py
index 77e8dc24..46a7b713 100644
--- a/src/llama_stack_client/_utils/_logs.py
+++ b/src/llama_stack_cli/_utils/_logs.py
@@ -1,22 +1,20 @@
 import os
 import logging
-from rich.logging import RichHandler
 
-logger: logging.Logger = logging.getLogger("llama_stack_client")
+logger: logging.Logger = logging.getLogger("llama_stack_cli")
 httpx_logger: logging.Logger = logging.getLogger("httpx")
 
 
 def _basic_config() -> None:
-    # e.g. [2023-10-05 14:12:26 - llama_stack_client._base_client:818 - DEBUG] HTTP Request: POST http://127.0.0.1:4010/foo/bar "200 OK"
+    # e.g. [2023-10-05 14:12:26 - llama_stack_cli._base_client:818 - DEBUG] HTTP Request: POST http://127.0.0.1:4010/foo/bar "200 OK"
     logging.basicConfig(
         format="[%(asctime)s - %(name)s:%(lineno)d - %(levelname)s] %(message)s",
         datefmt="%Y-%m-%d %H:%M:%S",
-        handlers=[RichHandler(rich_tracebacks=True)],
     )
 
 
 def setup_logging() -> None:
-    env = os.environ.get("LLAMA_STACK_LOG")
+    env = os.environ.get("LLAMA_STACK_CLI_LOG")
     if env == "debug":
         _basic_config()
         logger.setLevel(logging.DEBUG)
diff --git a/src/llama_stack_client/_utils/_proxy.py b/src/llama_stack_cli/_utils/_proxy.py
similarity index 100%
rename from src/llama_stack_client/_utils/_proxy.py
rename to src/llama_stack_cli/_utils/_proxy.py
diff --git a/src/llama_stack_client/_utils/_reflection.py b/src/llama_stack_cli/_utils/_reflection.py
similarity index 100%
rename from src/llama_stack_client/_utils/_reflection.py
rename to src/llama_stack_cli/_utils/_reflection.py
diff --git a/src/llama_stack_cli/_utils/_resources_proxy.py b/src/llama_stack_cli/_utils/_resources_proxy.py
new file mode 100644
index 00000000..951ebdb1
--- /dev/null
+++ b/src/llama_stack_cli/_utils/_resources_proxy.py
@@ -0,0 +1,24 @@
+from __future__ import annotations
+
+from typing import Any
+from typing_extensions import override
+
+from ._proxy import LazyProxy
+
+
+class ResourcesProxy(LazyProxy[Any]):
+    """A proxy for the `llama_stack_cli.resources` module.
+
+    This is used so that we can lazily import `llama_stack_cli.resources` only when
+    needed *and* so that users can just import `llama_stack_cli` and reference `llama_stack_cli.resources`
+    """
+
+    @override
+    def __load__(self) -> Any:
+        import importlib
+
+        mod = importlib.import_module("llama_stack_cli.resources")
+        return mod
+
+
+resources = ResourcesProxy().__as_proxied__()
diff --git a/src/llama_stack_client/_utils/_streams.py b/src/llama_stack_cli/_utils/_streams.py
similarity index 100%
rename from src/llama_stack_client/_utils/_streams.py
rename to src/llama_stack_cli/_utils/_streams.py
diff --git a/src/llama_stack_client/_utils/_sync.py b/src/llama_stack_cli/_utils/_sync.py
similarity index 100%
rename from src/llama_stack_client/_utils/_sync.py
rename to src/llama_stack_cli/_utils/_sync.py
diff --git a/src/llama_stack_client/_utils/_transform.py b/src/llama_stack_cli/_utils/_transform.py
similarity index 100%
rename from src/llama_stack_client/_utils/_transform.py
rename to src/llama_stack_cli/_utils/_transform.py
diff --git a/src/llama_stack_client/_utils/_typing.py b/src/llama_stack_cli/_utils/_typing.py
similarity index 100%
rename from src/llama_stack_client/_utils/_typing.py
rename to src/llama_stack_cli/_utils/_typing.py
diff --git a/src/llama_stack_client/_utils/_utils.py b/src/llama_stack_cli/_utils/_utils.py
similarity index 100%
rename from src/llama_stack_client/_utils/_utils.py
rename to src/llama_stack_cli/_utils/_utils.py
diff --git a/src/llama_stack_client/_version.py b/src/llama_stack_cli/_version.py
similarity index 59%
rename from src/llama_stack_client/_version.py
rename to src/llama_stack_cli/_version.py
index c320dee5..de2b23de 100644
--- a/src/llama_stack_client/_version.py
+++ b/src/llama_stack_cli/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-__title__ = "llama_stack_client"
-__version__ = "0.2.13"
+__title__ = "llama_stack_cli"
+__version__ = "0.0.1-alpha.0"
diff --git a/src/llama_stack_client/lib/.keep b/src/llama_stack_cli/lib/.keep
similarity index 81%
rename from src/llama_stack_client/lib/.keep
rename to src/llama_stack_cli/lib/.keep
index 7554f8b2..5e2c99fd 100644
--- a/src/llama_stack_client/lib/.keep
+++ b/src/llama_stack_cli/lib/.keep
@@ -1,4 +1,4 @@
 File generated from our OpenAPI spec by Stainless.
 
 This directory can be used to store custom files to expand the SDK.
-It is ignored by Stainless code generation and its content (other than this keep file) won't be touched.
+It is ignored by Stainless code generation and its content (other than this keep file) won't be touched.
\ No newline at end of file
diff --git a/src/llama_stack_client/py.typed b/src/llama_stack_cli/py.typed
similarity index 100%
rename from src/llama_stack_client/py.typed
rename to src/llama_stack_cli/py.typed
diff --git a/src/llama_stack_cli/resources/__init__.py b/src/llama_stack_cli/resources/__init__.py
new file mode 100644
index 00000000..49424c56
--- /dev/null
+++ b/src/llama_stack_cli/resources/__init__.py
@@ -0,0 +1,47 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .pet import (
+    PetResource,
+    AsyncPetResource,
+    PetResourceWithRawResponse,
+    AsyncPetResourceWithRawResponse,
+    PetResourceWithStreamingResponse,
+    AsyncPetResourceWithStreamingResponse,
+)
+from .user import (
+    UserResource,
+    AsyncUserResource,
+    UserResourceWithRawResponse,
+    AsyncUserResourceWithRawResponse,
+    UserResourceWithStreamingResponse,
+    AsyncUserResourceWithStreamingResponse,
+)
+from .store import (
+    StoreResource,
+    AsyncStoreResource,
+    StoreResourceWithRawResponse,
+    AsyncStoreResourceWithRawResponse,
+    StoreResourceWithStreamingResponse,
+    AsyncStoreResourceWithStreamingResponse,
+)
+
+__all__ = [
+    "PetResource",
+    "AsyncPetResource",
+    "PetResourceWithRawResponse",
+    "AsyncPetResourceWithRawResponse",
+    "PetResourceWithStreamingResponse",
+    "AsyncPetResourceWithStreamingResponse",
+    "StoreResource",
+    "AsyncStoreResource",
+    "StoreResourceWithRawResponse",
+    "AsyncStoreResourceWithRawResponse",
+    "StoreResourceWithStreamingResponse",
+    "AsyncStoreResourceWithStreamingResponse",
+    "UserResource",
+    "AsyncUserResource",
+    "UserResourceWithRawResponse",
+    "AsyncUserResourceWithRawResponse",
+    "UserResourceWithStreamingResponse",
+    "AsyncUserResourceWithStreamingResponse",
+]
diff --git a/src/llama_stack_client/resources/telemetry.py b/src/llama_stack_cli/resources/pet.py
similarity index 51%
rename from src/llama_stack_client/resources/telemetry.py
rename to src/llama_stack_cli/resources/pet.py
index 1cec537e..49d949d5 100644
--- a/src/llama_stack_client/resources/telemetry.py
+++ b/src/llama_stack_cli/resources/pet.py
@@ -2,18 +2,20 @@
 
 from __future__ import annotations
 
-from typing import List, Type, Iterable, cast
+from typing import List, Iterable
+from typing_extensions import Literal
 
 import httpx
 
 from ..types import (
-    telemetry_log_event_params,
-    telemetry_query_spans_params,
-    telemetry_query_traces_params,
-    telemetry_get_span_tree_params,
-    telemetry_save_spans_to_dataset_params,
+    pet_create_params,
+    pet_update_params,
+    pet_find_by_tags_params,
+    pet_update_by_id_params,
+    pet_upload_image_params,
+    pet_find_by_status_params,
 )
-from .._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
+from .._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven, FileTypes
 from .._utils import maybe_transform, async_maybe_transform
 from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
@@ -23,55 +25,58 @@
     async_to_raw_response_wrapper,
     async_to_streamed_response_wrapper,
 )
-from .._wrappers import DataWrapper
-from ..types.trace import Trace
+from ..types.pet import Pet
 from .._base_client import make_request_options
-from ..types.event_param import EventParam
-from ..types.query_condition_param import QueryConditionParam
-from ..types.telemetry_get_span_response import TelemetryGetSpanResponse
-from ..types.telemetry_query_spans_response import TelemetryQuerySpansResponse
-from ..types.telemetry_query_traces_response import TelemetryQueryTracesResponse
-from ..types.telemetry_get_span_tree_response import TelemetryGetSpanTreeResponse
+from ..types.category_param import CategoryParam
+from ..types.pet_find_by_tags_response import PetFindByTagsResponse
+from ..types.pet_upload_image_response import PetUploadImageResponse
+from ..types.pet_find_by_status_response import PetFindByStatusResponse
 
-__all__ = ["TelemetryResource", "AsyncTelemetryResource"]
+__all__ = ["PetResource", "AsyncPetResource"]
 
 
-class TelemetryResource(SyncAPIResource):
+class PetResource(SyncAPIResource):
     @cached_property
-    def with_raw_response(self) -> TelemetryResourceWithRawResponse:
+    def with_raw_response(self) -> PetResourceWithRawResponse:
         """
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/stainless-sdks/llama-stack-cli-python#accessing-raw-response-data-eg-headers
         """
-        return TelemetryResourceWithRawResponse(self)
+        return PetResourceWithRawResponse(self)
 
     @cached_property
-    def with_streaming_response(self) -> TelemetryResourceWithStreamingResponse:
+    def with_streaming_response(self) -> PetResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
+        For more information, see https://www.github.com/stainless-sdks/llama-stack-cli-python#with_streaming_response
         """
-        return TelemetryResourceWithStreamingResponse(self)
+        return PetResourceWithStreamingResponse(self)
 
-    def get_span(
+    def create(
         self,
-        span_id: str,
         *,
-        trace_id: str,
+        name: str,
+        photo_urls: List[str],
+        id: int | NotGiven = NOT_GIVEN,
+        category: CategoryParam | NotGiven = NOT_GIVEN,
+        status: Literal["available", "pending", "sold"] | NotGiven = NOT_GIVEN,
+        tags: Iterable[pet_create_params.Tag] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> TelemetryGetSpanResponse:
+    ) -> Pet:
         """
-        Get a span by its ID.
+        Add a new pet to the store
 
         Args:
+          status: pet status in the store
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -80,39 +85,40 @@ def get_span(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        if not trace_id:
-            raise ValueError(f"Expected a non-empty value for `trace_id` but received {trace_id!r}")
-        if not span_id:
-            raise ValueError(f"Expected a non-empty value for `span_id` but received {span_id!r}")
-        return self._get(
-            f"/v1/telemetry/traces/{trace_id}/spans/{span_id}",
+        return self._post(
+            "/pet",
+            body=maybe_transform(
+                {
+                    "name": name,
+                    "photo_urls": photo_urls,
+                    "id": id,
+                    "category": category,
+                    "status": status,
+                    "tags": tags,
+                },
+                pet_create_params.PetCreateParams,
+            ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=TelemetryGetSpanResponse,
+            cast_to=Pet,
         )
 
-    def get_span_tree(
+    def retrieve(
         self,
-        span_id: str,
+        pet_id: int,
         *,
-        attributes_to_return: List[str] | NotGiven = NOT_GIVEN,
-        max_depth: int | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> TelemetryGetSpanTreeResponse:
+    ) -> Pet:
         """
-        Get a span tree by its ID.
+        Returns a single pet
 
         Args:
-          attributes_to_return: The attributes to return in the tree.
-
-          max_depth: The maximum depth of the tree.
-
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -121,42 +127,36 @@ def get_span_tree(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        if not span_id:
-            raise ValueError(f"Expected a non-empty value for `span_id` but received {span_id!r}")
-        return self._post(
-            f"/v1/telemetry/spans/{span_id}/tree",
-            body=maybe_transform(
-                {
-                    "attributes_to_return": attributes_to_return,
-                    "max_depth": max_depth,
-                },
-                telemetry_get_span_tree_params.TelemetryGetSpanTreeParams,
-            ),
+        return self._get(
+            f"/pet/{pet_id}",
             options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                post_parser=DataWrapper[TelemetryGetSpanTreeResponse]._unwrapper,
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=cast(Type[TelemetryGetSpanTreeResponse], DataWrapper[TelemetryGetSpanTreeResponse]),
+            cast_to=Pet,
         )
 
-    def get_trace(
+    def update(
         self,
-        trace_id: str,
         *,
+        name: str,
+        photo_urls: List[str],
+        id: int | NotGiven = NOT_GIVEN,
+        category: CategoryParam | NotGiven = NOT_GIVEN,
+        status: Literal["available", "pending", "sold"] | NotGiven = NOT_GIVEN,
+        tags: Iterable[pet_update_params.Tag] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Trace:
+    ) -> Pet:
         """
-        Get a trace by its ID.
+        Update an existing pet by Id
 
         Args:
+          status: pet status in the store
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -165,21 +165,29 @@ def get_trace(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        if not trace_id:
-            raise ValueError(f"Expected a non-empty value for `trace_id` but received {trace_id!r}")
-        return self._get(
-            f"/v1/telemetry/traces/{trace_id}",
+        return self._put(
+            "/pet",
+            body=maybe_transform(
+                {
+                    "name": name,
+                    "photo_urls": photo_urls,
+                    "id": id,
+                    "category": category,
+                    "status": status,
+                    "tags": tags,
+                },
+                pet_update_params.PetUpdateParams,
+            ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=Trace,
+            cast_to=Pet,
         )
 
-    def log_event(
+    def delete(
         self,
+        pet_id: int,
         *,
-        event: EventParam,
-        ttl_seconds: int,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -188,13 +196,9 @@ def log_event(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> None:
         """
-        Log an event.
+        delete a pet
 
         Args:
-          event: The event to log.
-
-          ttl_seconds: The time to live of the event.
-
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -204,43 +208,69 @@ def log_event(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        return self._post(
-            "/v1/telemetry/events",
-            body=maybe_transform(
-                {
-                    "event": event,
-                    "ttl_seconds": ttl_seconds,
-                },
-                telemetry_log_event_params.TelemetryLogEventParams,
-            ),
+        return self._delete(
+            f"/pet/{pet_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=NoneType,
         )
 
-    def query_spans(
+    def find_by_status(
         self,
         *,
-        attribute_filters: Iterable[QueryConditionParam],
-        attributes_to_return: List[str],
-        max_depth: int | NotGiven = NOT_GIVEN,
+        status: Literal["available", "pending", "sold"] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> TelemetryQuerySpansResponse:
+    ) -> PetFindByStatusResponse:
         """
-        Query spans.
+        Multiple status values can be provided with comma separated strings
 
         Args:
-          attribute_filters: The attribute filters to apply to the spans.
+          status: Status values that need to be considered for filter
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get(
+            "/pet/findByStatus",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform({"status": status}, pet_find_by_status_params.PetFindByStatusParams),
+            ),
+            cast_to=PetFindByStatusResponse,
+        )
 
-          attributes_to_return: The attributes to return in the spans.
+    def find_by_tags(
+        self,
+        *,
+        tags: List[str] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> PetFindByTagsResponse:
+        """Multiple tags can be provided with comma separated strings.
 
-          max_depth: The maximum depth of the tree.
+        Use tag1, tag2, tag3
+        for testing.
+
+        Args:
+          tags: Tags to filter by
 
           extra_headers: Send extra headers
 
@@ -250,51 +280,38 @@ def query_spans(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        return self._post(
-            "/v1/telemetry/spans",
-            body=maybe_transform(
-                {
-                    "attribute_filters": attribute_filters,
-                    "attributes_to_return": attributes_to_return,
-                    "max_depth": max_depth,
-                },
-                telemetry_query_spans_params.TelemetryQuerySpansParams,
-            ),
+        return self._get(
+            "/pet/findByTags",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
                 extra_body=extra_body,
                 timeout=timeout,
-                post_parser=DataWrapper[TelemetryQuerySpansResponse]._unwrapper,
+                query=maybe_transform({"tags": tags}, pet_find_by_tags_params.PetFindByTagsParams),
             ),
-            cast_to=cast(Type[TelemetryQuerySpansResponse], DataWrapper[TelemetryQuerySpansResponse]),
+            cast_to=PetFindByTagsResponse,
         )
 
-    def query_traces(
+    def update_by_id(
         self,
+        pet_id: int,
         *,
-        attribute_filters: Iterable[QueryConditionParam] | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        offset: int | NotGiven = NOT_GIVEN,
-        order_by: List[str] | NotGiven = NOT_GIVEN,
+        name: str | NotGiven = NOT_GIVEN,
+        status: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> TelemetryQueryTracesResponse:
+    ) -> None:
         """
-        Query traces.
+        Updates a pet in the store with form data
 
         Args:
-          attribute_filters: The attribute filters to apply to the traces.
-
-          limit: The limit of traces to return.
-
-          offset: The offset of the traces to return.
+          name: Name of pet that needs to be updated
 
-          order_by: The order by of the traces to return.
+          status: Status of pet that needs to be updated
 
           extra_headers: Send extra headers
 
@@ -304,52 +321,43 @@ def query_traces(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
         return self._post(
-            "/v1/telemetry/traces",
-            body=maybe_transform(
-                {
-                    "attribute_filters": attribute_filters,
-                    "limit": limit,
-                    "offset": offset,
-                    "order_by": order_by,
-                },
-                telemetry_query_traces_params.TelemetryQueryTracesParams,
-            ),
+            f"/pet/{pet_id}",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
                 extra_body=extra_body,
                 timeout=timeout,
-                post_parser=DataWrapper[TelemetryQueryTracesResponse]._unwrapper,
+                query=maybe_transform(
+                    {
+                        "name": name,
+                        "status": status,
+                    },
+                    pet_update_by_id_params.PetUpdateByIDParams,
+                ),
             ),
-            cast_to=cast(Type[TelemetryQueryTracesResponse], DataWrapper[TelemetryQueryTracesResponse]),
+            cast_to=NoneType,
         )
 
-    def save_spans_to_dataset(
+    def upload_image(
         self,
+        pet_id: int,
         *,
-        attribute_filters: Iterable[QueryConditionParam],
-        attributes_to_save: List[str],
-        dataset_id: str,
-        max_depth: int | NotGiven = NOT_GIVEN,
+        additional_metadata: str | NotGiven = NOT_GIVEN,
+        image: FileTypes | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> None:
+    ) -> PetUploadImageResponse:
         """
-        Save spans to a dataset.
+        uploads an image
 
         Args:
-          attribute_filters: The attribute filters to apply to the spans.
-
-          attributes_to_save: The attributes to save to the dataset.
-
-          dataset_id: The ID of the dataset to save the spans to.
-
-          max_depth: The maximum depth of the tree.
+          additional_metadata: Additional Metadata
 
           extra_headers: Send extra headers
 
@@ -359,61 +367,64 @@ def save_spans_to_dataset(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
         return self._post(
-            "/v1/telemetry/spans/export",
-            body=maybe_transform(
-                {
-                    "attribute_filters": attribute_filters,
-                    "attributes_to_save": attributes_to_save,
-                    "dataset_id": dataset_id,
-                    "max_depth": max_depth,
-                },
-                telemetry_save_spans_to_dataset_params.TelemetrySaveSpansToDatasetParams,
-            ),
+            f"/pet/{pet_id}/uploadImage",
+            body=maybe_transform(image, pet_upload_image_params.PetUploadImageParams),
             options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {"additional_metadata": additional_metadata}, pet_upload_image_params.PetUploadImageParams
+                ),
             ),
-            cast_to=NoneType,
+            cast_to=PetUploadImageResponse,
         )
 
 
-class AsyncTelemetryResource(AsyncAPIResource):
+class AsyncPetResource(AsyncAPIResource):
     @cached_property
-    def with_raw_response(self) -> AsyncTelemetryResourceWithRawResponse:
+    def with_raw_response(self) -> AsyncPetResourceWithRawResponse:
         """
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/stainless-sdks/llama-stack-cli-python#accessing-raw-response-data-eg-headers
         """
-        return AsyncTelemetryResourceWithRawResponse(self)
+        return AsyncPetResourceWithRawResponse(self)
 
     @cached_property
-    def with_streaming_response(self) -> AsyncTelemetryResourceWithStreamingResponse:
+    def with_streaming_response(self) -> AsyncPetResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
+        For more information, see https://www.github.com/stainless-sdks/llama-stack-cli-python#with_streaming_response
         """
-        return AsyncTelemetryResourceWithStreamingResponse(self)
+        return AsyncPetResourceWithStreamingResponse(self)
 
-    async def get_span(
+    async def create(
         self,
-        span_id: str,
         *,
-        trace_id: str,
+        name: str,
+        photo_urls: List[str],
+        id: int | NotGiven = NOT_GIVEN,
+        category: CategoryParam | NotGiven = NOT_GIVEN,
+        status: Literal["available", "pending", "sold"] | NotGiven = NOT_GIVEN,
+        tags: Iterable[pet_create_params.Tag] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> TelemetryGetSpanResponse:
+    ) -> Pet:
         """
-        Get a span by its ID.
+        Add a new pet to the store
 
         Args:
+          status: pet status in the store
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -422,39 +433,40 @@ async def get_span(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        if not trace_id:
-            raise ValueError(f"Expected a non-empty value for `trace_id` but received {trace_id!r}")
-        if not span_id:
-            raise ValueError(f"Expected a non-empty value for `span_id` but received {span_id!r}")
-        return await self._get(
-            f"/v1/telemetry/traces/{trace_id}/spans/{span_id}",
+        return await self._post(
+            "/pet",
+            body=await async_maybe_transform(
+                {
+                    "name": name,
+                    "photo_urls": photo_urls,
+                    "id": id,
+                    "category": category,
+                    "status": status,
+                    "tags": tags,
+                },
+                pet_create_params.PetCreateParams,
+            ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=TelemetryGetSpanResponse,
+            cast_to=Pet,
         )
 
-    async def get_span_tree(
+    async def retrieve(
         self,
-        span_id: str,
+        pet_id: int,
         *,
-        attributes_to_return: List[str] | NotGiven = NOT_GIVEN,
-        max_depth: int | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> TelemetryGetSpanTreeResponse:
+    ) -> Pet:
         """
-        Get a span tree by its ID.
+        Returns a single pet
 
         Args:
-          attributes_to_return: The attributes to return in the tree.
-
-          max_depth: The maximum depth of the tree.
-
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -463,42 +475,36 @@ async def get_span_tree(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        if not span_id:
-            raise ValueError(f"Expected a non-empty value for `span_id` but received {span_id!r}")
-        return await self._post(
-            f"/v1/telemetry/spans/{span_id}/tree",
-            body=await async_maybe_transform(
-                {
-                    "attributes_to_return": attributes_to_return,
-                    "max_depth": max_depth,
-                },
-                telemetry_get_span_tree_params.TelemetryGetSpanTreeParams,
-            ),
+        return await self._get(
+            f"/pet/{pet_id}",
             options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                post_parser=DataWrapper[TelemetryGetSpanTreeResponse]._unwrapper,
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=cast(Type[TelemetryGetSpanTreeResponse], DataWrapper[TelemetryGetSpanTreeResponse]),
+            cast_to=Pet,
         )
 
-    async def get_trace(
+    async def update(
         self,
-        trace_id: str,
         *,
+        name: str,
+        photo_urls: List[str],
+        id: int | NotGiven = NOT_GIVEN,
+        category: CategoryParam | NotGiven = NOT_GIVEN,
+        status: Literal["available", "pending", "sold"] | NotGiven = NOT_GIVEN,
+        tags: Iterable[pet_update_params.Tag] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Trace:
+    ) -> Pet:
         """
-        Get a trace by its ID.
+        Update an existing pet by Id
 
         Args:
+          status: pet status in the store
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -507,21 +513,29 @@ async def get_trace(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        if not trace_id:
-            raise ValueError(f"Expected a non-empty value for `trace_id` but received {trace_id!r}")
-        return await self._get(
-            f"/v1/telemetry/traces/{trace_id}",
+        return await self._put(
+            "/pet",
+            body=await async_maybe_transform(
+                {
+                    "name": name,
+                    "photo_urls": photo_urls,
+                    "id": id,
+                    "category": category,
+                    "status": status,
+                    "tags": tags,
+                },
+                pet_update_params.PetUpdateParams,
+            ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=Trace,
+            cast_to=Pet,
         )
 
-    async def log_event(
+    async def delete(
         self,
+        pet_id: int,
         *,
-        event: EventParam,
-        ttl_seconds: int,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -530,13 +544,9 @@ async def log_event(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> None:
         """
-        Log an event.
+        delete a pet
 
         Args:
-          event: The event to log.
-
-          ttl_seconds: The time to live of the event.
-
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -546,43 +556,69 @@ async def log_event(
           timeout: Override the client-level default timeout for this request, in seconds
         """
         extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        return await self._post(
-            "/v1/telemetry/events",
-            body=await async_maybe_transform(
-                {
-                    "event": event,
-                    "ttl_seconds": ttl_seconds,
-                },
-                telemetry_log_event_params.TelemetryLogEventParams,
-            ),
+        return await self._delete(
+            f"/pet/{pet_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=NoneType,
         )
 
-    async def query_spans(
+    async def find_by_status(
         self,
         *,
-        attribute_filters: Iterable[QueryConditionParam],
-        attributes_to_return: List[str],
-        max_depth: int | NotGiven = NOT_GIVEN,
+        status: Literal["available", "pending", "sold"] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> TelemetryQuerySpansResponse:
+    ) -> PetFindByStatusResponse:
         """
-        Query spans.
+        Multiple status values can be provided with comma separated strings
 
         Args:
-          attribute_filters: The attribute filters to apply to the spans.
+          status: Status values that need to be considered for filter
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._get(
+            "/pet/findByStatus",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=await async_maybe_transform({"status": status}, pet_find_by_status_params.PetFindByStatusParams),
+            ),
+            cast_to=PetFindByStatusResponse,
+        )
+
+    async def find_by_tags(
+        self,
+        *,
+        tags: List[str] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> PetFindByTagsResponse:
+        """Multiple tags can be provided with comma separated strings.
 
-          attributes_to_return: The attributes to return in the spans.
+        Use tag1, tag2, tag3
+        for testing.
 
-          max_depth: The maximum depth of the tree.
+        Args:
+          tags: Tags to filter by
 
           extra_headers: Send extra headers
 
@@ -592,51 +628,38 @@ async def query_spans(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        return await self._post(
-            "/v1/telemetry/spans",
-            body=await async_maybe_transform(
-                {
-                    "attribute_filters": attribute_filters,
-                    "attributes_to_return": attributes_to_return,
-                    "max_depth": max_depth,
-                },
-                telemetry_query_spans_params.TelemetryQuerySpansParams,
-            ),
+        return await self._get(
+            "/pet/findByTags",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
                 extra_body=extra_body,
                 timeout=timeout,
-                post_parser=DataWrapper[TelemetryQuerySpansResponse]._unwrapper,
+                query=await async_maybe_transform({"tags": tags}, pet_find_by_tags_params.PetFindByTagsParams),
             ),
-            cast_to=cast(Type[TelemetryQuerySpansResponse], DataWrapper[TelemetryQuerySpansResponse]),
+            cast_to=PetFindByTagsResponse,
         )
 
-    async def query_traces(
+    async def update_by_id(
         self,
+        pet_id: int,
         *,
-        attribute_filters: Iterable[QueryConditionParam] | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        offset: int | NotGiven = NOT_GIVEN,
-        order_by: List[str] | NotGiven = NOT_GIVEN,
+        name: str | NotGiven = NOT_GIVEN,
+        status: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> TelemetryQueryTracesResponse:
+    ) -> None:
         """
-        Query traces.
+        Updates a pet in the store with form data
 
         Args:
-          attribute_filters: The attribute filters to apply to the traces.
-
-          limit: The limit of traces to return.
-
-          offset: The offset of the traces to return.
+          name: Name of pet that needs to be updated
 
-          order_by: The order by of the traces to return.
+          status: Status of pet that needs to be updated
 
           extra_headers: Send extra headers
 
@@ -646,52 +669,43 @@ async def query_traces(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
         return await self._post(
-            "/v1/telemetry/traces",
-            body=await async_maybe_transform(
-                {
-                    "attribute_filters": attribute_filters,
-                    "limit": limit,
-                    "offset": offset,
-                    "order_by": order_by,
-                },
-                telemetry_query_traces_params.TelemetryQueryTracesParams,
-            ),
+            f"/pet/{pet_id}",
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
                 extra_body=extra_body,
                 timeout=timeout,
-                post_parser=DataWrapper[TelemetryQueryTracesResponse]._unwrapper,
+                query=await async_maybe_transform(
+                    {
+                        "name": name,
+                        "status": status,
+                    },
+                    pet_update_by_id_params.PetUpdateByIDParams,
+                ),
             ),
-            cast_to=cast(Type[TelemetryQueryTracesResponse], DataWrapper[TelemetryQueryTracesResponse]),
+            cast_to=NoneType,
         )
 
-    async def save_spans_to_dataset(
+    async def upload_image(
         self,
+        pet_id: int,
         *,
-        attribute_filters: Iterable[QueryConditionParam],
-        attributes_to_save: List[str],
-        dataset_id: str,
-        max_depth: int | NotGiven = NOT_GIVEN,
+        additional_metadata: str | NotGiven = NOT_GIVEN,
+        image: FileTypes | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> None:
+    ) -> PetUploadImageResponse:
         """
-        Save spans to a dataset.
+        uploads an image
 
         Args:
-          attribute_filters: The attribute filters to apply to the spans.
-
-          attributes_to_save: The attributes to save to the dataset.
-
-          dataset_id: The ID of the dataset to save the spans to.
-
-          max_depth: The maximum depth of the tree.
+          additional_metadata: Additional Metadata
 
           extra_headers: Send extra headers
 
@@ -701,128 +715,137 @@ async def save_spans_to_dataset(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
         return await self._post(
-            "/v1/telemetry/spans/export",
-            body=await async_maybe_transform(
-                {
-                    "attribute_filters": attribute_filters,
-                    "attributes_to_save": attributes_to_save,
-                    "dataset_id": dataset_id,
-                    "max_depth": max_depth,
-                },
-                telemetry_save_spans_to_dataset_params.TelemetrySaveSpansToDatasetParams,
-            ),
+            f"/pet/{pet_id}/uploadImage",
+            body=await async_maybe_transform(image, pet_upload_image_params.PetUploadImageParams),
             options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=await async_maybe_transform(
+                    {"additional_metadata": additional_metadata}, pet_upload_image_params.PetUploadImageParams
+                ),
             ),
-            cast_to=NoneType,
+            cast_to=PetUploadImageResponse,
         )
 
 
-class TelemetryResourceWithRawResponse:
-    def __init__(self, telemetry: TelemetryResource) -> None:
-        self._telemetry = telemetry
+class PetResourceWithRawResponse:
+    def __init__(self, pet: PetResource) -> None:
+        self._pet = pet
 
-        self.get_span = to_raw_response_wrapper(
-            telemetry.get_span,
+        self.create = to_raw_response_wrapper(
+            pet.create,
+        )
+        self.retrieve = to_raw_response_wrapper(
+            pet.retrieve,
         )
-        self.get_span_tree = to_raw_response_wrapper(
-            telemetry.get_span_tree,
+        self.update = to_raw_response_wrapper(
+            pet.update,
         )
-        self.get_trace = to_raw_response_wrapper(
-            telemetry.get_trace,
+        self.delete = to_raw_response_wrapper(
+            pet.delete,
         )
-        self.log_event = to_raw_response_wrapper(
-            telemetry.log_event,
+        self.find_by_status = to_raw_response_wrapper(
+            pet.find_by_status,
         )
-        self.query_spans = to_raw_response_wrapper(
-            telemetry.query_spans,
+        self.find_by_tags = to_raw_response_wrapper(
+            pet.find_by_tags,
         )
-        self.query_traces = to_raw_response_wrapper(
-            telemetry.query_traces,
+        self.update_by_id = to_raw_response_wrapper(
+            pet.update_by_id,
         )
-        self.save_spans_to_dataset = to_raw_response_wrapper(
-            telemetry.save_spans_to_dataset,
+        self.upload_image = to_raw_response_wrapper(
+            pet.upload_image,
         )
 
 
-class AsyncTelemetryResourceWithRawResponse:
-    def __init__(self, telemetry: AsyncTelemetryResource) -> None:
-        self._telemetry = telemetry
+class AsyncPetResourceWithRawResponse:
+    def __init__(self, pet: AsyncPetResource) -> None:
+        self._pet = pet
 
-        self.get_span = async_to_raw_response_wrapper(
-            telemetry.get_span,
+        self.create = async_to_raw_response_wrapper(
+            pet.create,
+        )
+        self.retrieve = async_to_raw_response_wrapper(
+            pet.retrieve,
         )
-        self.get_span_tree = async_to_raw_response_wrapper(
-            telemetry.get_span_tree,
+        self.update = async_to_raw_response_wrapper(
+            pet.update,
         )
-        self.get_trace = async_to_raw_response_wrapper(
-            telemetry.get_trace,
+        self.delete = async_to_raw_response_wrapper(
+            pet.delete,
         )
-        self.log_event = async_to_raw_response_wrapper(
-            telemetry.log_event,
+        self.find_by_status = async_to_raw_response_wrapper(
+            pet.find_by_status,
         )
-        self.query_spans = async_to_raw_response_wrapper(
-            telemetry.query_spans,
+        self.find_by_tags = async_to_raw_response_wrapper(
+            pet.find_by_tags,
         )
-        self.query_traces = async_to_raw_response_wrapper(
-            telemetry.query_traces,
+        self.update_by_id = async_to_raw_response_wrapper(
+            pet.update_by_id,
         )
-        self.save_spans_to_dataset = async_to_raw_response_wrapper(
-            telemetry.save_spans_to_dataset,
+        self.upload_image = async_to_raw_response_wrapper(
+            pet.upload_image,
         )
 
 
-class TelemetryResourceWithStreamingResponse:
-    def __init__(self, telemetry: TelemetryResource) -> None:
-        self._telemetry = telemetry
+class PetResourceWithStreamingResponse:
+    def __init__(self, pet: PetResource) -> None:
+        self._pet = pet
 
-        self.get_span = to_streamed_response_wrapper(
-            telemetry.get_span,
+        self.create = to_streamed_response_wrapper(
+            pet.create,
         )
-        self.get_span_tree = to_streamed_response_wrapper(
-            telemetry.get_span_tree,
+        self.retrieve = to_streamed_response_wrapper(
+            pet.retrieve,
         )
-        self.get_trace = to_streamed_response_wrapper(
-            telemetry.get_trace,
+        self.update = to_streamed_response_wrapper(
+            pet.update,
         )
-        self.log_event = to_streamed_response_wrapper(
-            telemetry.log_event,
+        self.delete = to_streamed_response_wrapper(
+            pet.delete,
         )
-        self.query_spans = to_streamed_response_wrapper(
-            telemetry.query_spans,
+        self.find_by_status = to_streamed_response_wrapper(
+            pet.find_by_status,
         )
-        self.query_traces = to_streamed_response_wrapper(
-            telemetry.query_traces,
+        self.find_by_tags = to_streamed_response_wrapper(
+            pet.find_by_tags,
         )
-        self.save_spans_to_dataset = to_streamed_response_wrapper(
-            telemetry.save_spans_to_dataset,
+        self.update_by_id = to_streamed_response_wrapper(
+            pet.update_by_id,
+        )
+        self.upload_image = to_streamed_response_wrapper(
+            pet.upload_image,
         )
 
 
-class AsyncTelemetryResourceWithStreamingResponse:
-    def __init__(self, telemetry: AsyncTelemetryResource) -> None:
-        self._telemetry = telemetry
+class AsyncPetResourceWithStreamingResponse:
+    def __init__(self, pet: AsyncPetResource) -> None:
+        self._pet = pet
 
-        self.get_span = async_to_streamed_response_wrapper(
-            telemetry.get_span,
+        self.create = async_to_streamed_response_wrapper(
+            pet.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            pet.retrieve,
         )
-        self.get_span_tree = async_to_streamed_response_wrapper(
-            telemetry.get_span_tree,
+        self.update = async_to_streamed_response_wrapper(
+            pet.update,
         )
-        self.get_trace = async_to_streamed_response_wrapper(
-            telemetry.get_trace,
+        self.delete = async_to_streamed_response_wrapper(
+            pet.delete,
         )
-        self.log_event = async_to_streamed_response_wrapper(
-            telemetry.log_event,
+        self.find_by_status = async_to_streamed_response_wrapper(
+            pet.find_by_status,
         )
-        self.query_spans = async_to_streamed_response_wrapper(
-            telemetry.query_spans,
+        self.find_by_tags = async_to_streamed_response_wrapper(
+            pet.find_by_tags,
         )
-        self.query_traces = async_to_streamed_response_wrapper(
-            telemetry.query_traces,
+        self.update_by_id = async_to_streamed_response_wrapper(
+            pet.update_by_id,
         )
-        self.save_spans_to_dataset = async_to_streamed_response_wrapper(
-            telemetry.save_spans_to_dataset,
+        self.upload_image = async_to_streamed_response_wrapper(
+            pet.upload_image,
         )
diff --git a/src/llama_stack_cli/resources/store/__init__.py b/src/llama_stack_cli/resources/store/__init__.py
new file mode 100644
index 00000000..0f819768
--- /dev/null
+++ b/src/llama_stack_cli/resources/store/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .order import (
+    OrderResource,
+    AsyncOrderResource,
+    OrderResourceWithRawResponse,
+    AsyncOrderResourceWithRawResponse,
+    OrderResourceWithStreamingResponse,
+    AsyncOrderResourceWithStreamingResponse,
+)
+from .store import (
+    StoreResource,
+    AsyncStoreResource,
+    StoreResourceWithRawResponse,
+    AsyncStoreResourceWithRawResponse,
+    StoreResourceWithStreamingResponse,
+    AsyncStoreResourceWithStreamingResponse,
+)
+
+__all__ = [
+    "OrderResource",
+    "AsyncOrderResource",
+    "OrderResourceWithRawResponse",
+    "AsyncOrderResourceWithRawResponse",
+    "OrderResourceWithStreamingResponse",
+    "AsyncOrderResourceWithStreamingResponse",
+    "StoreResource",
+    "AsyncStoreResource",
+    "StoreResourceWithRawResponse",
+    "AsyncStoreResourceWithRawResponse",
+    "StoreResourceWithStreamingResponse",
+    "AsyncStoreResourceWithStreamingResponse",
+]
diff --git a/src/llama_stack_client/resources/eval/jobs.py b/src/llama_stack_cli/resources/store/order.py
similarity index 58%
rename from src/llama_stack_client/resources/eval/jobs.py
rename to src/llama_stack_cli/resources/store/order.py
index 16fa337f..4d9489c3 100644
--- a/src/llama_stack_client/resources/eval/jobs.py
+++ b/src/llama_stack_cli/resources/store/order.py
@@ -2,9 +2,14 @@
 
 from __future__ import annotations
 
+from typing import Union
+from datetime import datetime
+from typing_extensions import Literal
+
 import httpx
 
 from ..._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
+from ..._utils import maybe_transform, async_maybe_transform
 from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
 from ..._response import (
@@ -13,49 +18,55 @@
     async_to_raw_response_wrapper,
     async_to_streamed_response_wrapper,
 )
-from ...types.job import Job
+from ...types.store import order_create_params
 from ..._base_client import make_request_options
-from ...types.evaluate_response import EvaluateResponse
+from ...types.shared.order import Order
 
-__all__ = ["JobsResource", "AsyncJobsResource"]
+__all__ = ["OrderResource", "AsyncOrderResource"]
 
 
-class JobsResource(SyncAPIResource):
+class OrderResource(SyncAPIResource):
     @cached_property
-    def with_raw_response(self) -> JobsResourceWithRawResponse:
+    def with_raw_response(self) -> OrderResourceWithRawResponse:
         """
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/stainless-sdks/llama-stack-cli-python#accessing-raw-response-data-eg-headers
         """
-        return JobsResourceWithRawResponse(self)
+        return OrderResourceWithRawResponse(self)
 
     @cached_property
-    def with_streaming_response(self) -> JobsResourceWithStreamingResponse:
+    def with_streaming_response(self) -> OrderResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
+        For more information, see https://www.github.com/stainless-sdks/llama-stack-cli-python#with_streaming_response
         """
-        return JobsResourceWithStreamingResponse(self)
+        return OrderResourceWithStreamingResponse(self)
 
-    def retrieve(
+    def create(
         self,
-        job_id: str,
         *,
-        benchmark_id: str,
+        id: int | NotGiven = NOT_GIVEN,
+        complete: bool | NotGiven = NOT_GIVEN,
+        pet_id: int | NotGiven = NOT_GIVEN,
+        quantity: int | NotGiven = NOT_GIVEN,
+        ship_date: Union[str, datetime] | NotGiven = NOT_GIVEN,
+        status: Literal["placed", "approved", "delivered"] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> EvaluateResponse:
+    ) -> Order:
         """
-        Get the result of a job.
+        Place a new order in the store
 
         Args:
+          status: Order Status
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -64,32 +75,40 @@ def retrieve(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        if not benchmark_id:
-            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
-        if not job_id:
-            raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
-        return self._get(
-            f"/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result",
+        return self._post(
+            "/store/order",
+            body=maybe_transform(
+                {
+                    "id": id,
+                    "complete": complete,
+                    "pet_id": pet_id,
+                    "quantity": quantity,
+                    "ship_date": ship_date,
+                    "status": status,
+                },
+                order_create_params.OrderCreateParams,
+            ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=EvaluateResponse,
+            cast_to=Order,
         )
 
-    def cancel(
+    def retrieve(
         self,
-        job_id: str,
+        order_id: int,
         *,
-        benchmark_id: str,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> None:
-        """
-        Cancel a job.
+    ) -> Order:
+        """For valid response try integer IDs with value <= 5 or > 10.
+
+        Other values will
+        generate exceptions.
 
         Args:
           extra_headers: Send extra headers
@@ -100,33 +119,29 @@ def cancel(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        if not benchmark_id:
-            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
-        if not job_id:
-            raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        return self._delete(
-            f"/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
+        return self._get(
+            f"/store/order/{order_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=NoneType,
+            cast_to=Order,
         )
 
-    def status(
+    def delete(
         self,
-        job_id: str,
+        order_id: int,
         *,
-        benchmark_id: str,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Job:
-        """
-        Get the status of a job.
+    ) -> None:
+        """For valid response try integer IDs with value < 1000.
+
+        Anything above 1000 or
+        nonintegers will generate API errors
 
         Args:
           extra_headers: Send extra headers
@@ -137,55 +152,58 @@ def status(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        if not benchmark_id:
-            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
-        if not job_id:
-            raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
-        return self._get(
-            f"/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return self._delete(
+            f"/store/order/{order_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=Job,
+            cast_to=NoneType,
         )
 
 
-class AsyncJobsResource(AsyncAPIResource):
+class AsyncOrderResource(AsyncAPIResource):
     @cached_property
-    def with_raw_response(self) -> AsyncJobsResourceWithRawResponse:
+    def with_raw_response(self) -> AsyncOrderResourceWithRawResponse:
         """
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/stainless-sdks/llama-stack-cli-python#accessing-raw-response-data-eg-headers
         """
-        return AsyncJobsResourceWithRawResponse(self)
+        return AsyncOrderResourceWithRawResponse(self)
 
     @cached_property
-    def with_streaming_response(self) -> AsyncJobsResourceWithStreamingResponse:
+    def with_streaming_response(self) -> AsyncOrderResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
+        For more information, see https://www.github.com/stainless-sdks/llama-stack-cli-python#with_streaming_response
         """
-        return AsyncJobsResourceWithStreamingResponse(self)
+        return AsyncOrderResourceWithStreamingResponse(self)
 
-    async def retrieve(
+    async def create(
         self,
-        job_id: str,
         *,
-        benchmark_id: str,
+        id: int | NotGiven = NOT_GIVEN,
+        complete: bool | NotGiven = NOT_GIVEN,
+        pet_id: int | NotGiven = NOT_GIVEN,
+        quantity: int | NotGiven = NOT_GIVEN,
+        ship_date: Union[str, datetime] | NotGiven = NOT_GIVEN,
+        status: Literal["placed", "approved", "delivered"] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> EvaluateResponse:
+    ) -> Order:
         """
-        Get the result of a job.
+        Place a new order in the store
 
         Args:
+          status: Order Status
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -194,32 +212,40 @@ async def retrieve(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        if not benchmark_id:
-            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
-        if not job_id:
-            raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
-        return await self._get(
-            f"/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result",
+        return await self._post(
+            "/store/order",
+            body=await async_maybe_transform(
+                {
+                    "id": id,
+                    "complete": complete,
+                    "pet_id": pet_id,
+                    "quantity": quantity,
+                    "ship_date": ship_date,
+                    "status": status,
+                },
+                order_create_params.OrderCreateParams,
+            ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=EvaluateResponse,
+            cast_to=Order,
         )
 
-    async def cancel(
+    async def retrieve(
         self,
-        job_id: str,
+        order_id: int,
         *,
-        benchmark_id: str,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> None:
-        """
-        Cancel a job.
+    ) -> Order:
+        """For valid response try integer IDs with value <= 5 or > 10.
+
+        Other values will
+        generate exceptions.
 
         Args:
           extra_headers: Send extra headers
@@ -230,33 +256,29 @@ async def cancel(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        if not benchmark_id:
-            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
-        if not job_id:
-            raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        return await self._delete(
-            f"/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
+        return await self._get(
+            f"/store/order/{order_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=NoneType,
+            cast_to=Order,
         )
 
-    async def status(
+    async def delete(
         self,
-        job_id: str,
+        order_id: int,
         *,
-        benchmark_id: str,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Job:
-        """
-        Get the status of a job.
+    ) -> None:
+        """For valid response try integer IDs with value < 1000.
+
+        Anything above 1000 or
+        nonintegers will generate API errors
 
         Args:
           extra_headers: Send extra headers
@@ -267,74 +289,71 @@ async def status(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        if not benchmark_id:
-            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
-        if not job_id:
-            raise ValueError(f"Expected a non-empty value for `job_id` but received {job_id!r}")
-        return await self._get(
-            f"/v1/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return await self._delete(
+            f"/store/order/{order_id}",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            cast_to=Job,
+            cast_to=NoneType,
         )
 
 
-class JobsResourceWithRawResponse:
-    def __init__(self, jobs: JobsResource) -> None:
-        self._jobs = jobs
+class OrderResourceWithRawResponse:
+    def __init__(self, order: OrderResource) -> None:
+        self._order = order
 
-        self.retrieve = to_raw_response_wrapper(
-            jobs.retrieve,
+        self.create = to_raw_response_wrapper(
+            order.create,
         )
-        self.cancel = to_raw_response_wrapper(
-            jobs.cancel,
+        self.retrieve = to_raw_response_wrapper(
+            order.retrieve,
         )
-        self.status = to_raw_response_wrapper(
-            jobs.status,
+        self.delete = to_raw_response_wrapper(
+            order.delete,
         )
 
 
-class AsyncJobsResourceWithRawResponse:
-    def __init__(self, jobs: AsyncJobsResource) -> None:
-        self._jobs = jobs
+class AsyncOrderResourceWithRawResponse:
+    def __init__(self, order: AsyncOrderResource) -> None:
+        self._order = order
 
-        self.retrieve = async_to_raw_response_wrapper(
-            jobs.retrieve,
+        self.create = async_to_raw_response_wrapper(
+            order.create,
         )
-        self.cancel = async_to_raw_response_wrapper(
-            jobs.cancel,
+        self.retrieve = async_to_raw_response_wrapper(
+            order.retrieve,
         )
-        self.status = async_to_raw_response_wrapper(
-            jobs.status,
+        self.delete = async_to_raw_response_wrapper(
+            order.delete,
         )
 
 
-class JobsResourceWithStreamingResponse:
-    def __init__(self, jobs: JobsResource) -> None:
-        self._jobs = jobs
+class OrderResourceWithStreamingResponse:
+    def __init__(self, order: OrderResource) -> None:
+        self._order = order
 
-        self.retrieve = to_streamed_response_wrapper(
-            jobs.retrieve,
+        self.create = to_streamed_response_wrapper(
+            order.create,
         )
-        self.cancel = to_streamed_response_wrapper(
-            jobs.cancel,
+        self.retrieve = to_streamed_response_wrapper(
+            order.retrieve,
         )
-        self.status = to_streamed_response_wrapper(
-            jobs.status,
+        self.delete = to_streamed_response_wrapper(
+            order.delete,
         )
 
 
-class AsyncJobsResourceWithStreamingResponse:
-    def __init__(self, jobs: AsyncJobsResource) -> None:
-        self._jobs = jobs
+class AsyncOrderResourceWithStreamingResponse:
+    def __init__(self, order: AsyncOrderResource) -> None:
+        self._order = order
 
-        self.retrieve = async_to_streamed_response_wrapper(
-            jobs.retrieve,
+        self.create = async_to_streamed_response_wrapper(
+            order.create,
         )
-        self.cancel = async_to_streamed_response_wrapper(
-            jobs.cancel,
+        self.retrieve = async_to_streamed_response_wrapper(
+            order.retrieve,
         )
-        self.status = async_to_streamed_response_wrapper(
-            jobs.status,
+        self.delete = async_to_streamed_response_wrapper(
+            order.delete,
         )
diff --git a/src/llama_stack_cli/resources/store/store.py b/src/llama_stack_cli/resources/store/store.py
new file mode 100644
index 00000000..1494e998
--- /dev/null
+++ b/src/llama_stack_cli/resources/store/store.py
@@ -0,0 +1,167 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import httpx
+
+from .order import (
+    OrderResource,
+    AsyncOrderResource,
+    OrderResourceWithRawResponse,
+    AsyncOrderResourceWithRawResponse,
+    OrderResourceWithStreamingResponse,
+    AsyncOrderResourceWithStreamingResponse,
+)
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import (
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from ..._base_client import make_request_options
+from ...types.store_list_inventory_response import StoreListInventoryResponse
+
+__all__ = ["StoreResource", "AsyncStoreResource"]
+
+
+class StoreResource(SyncAPIResource):
+    @cached_property
+    def order(self) -> OrderResource:
+        return OrderResource(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> StoreResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/stainless-sdks/llama-stack-cli-python#accessing-raw-response-data-eg-headers
+        """
+        return StoreResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> StoreResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/stainless-sdks/llama-stack-cli-python#with_streaming_response
+        """
+        return StoreResourceWithStreamingResponse(self)
+
+    def list_inventory(
+        self,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> StoreListInventoryResponse:
+        """Returns a map of status codes to quantities"""
+        return self._get(
+            "/store/inventory",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=StoreListInventoryResponse,
+        )
+
+
+class AsyncStoreResource(AsyncAPIResource):
+    @cached_property
+    def order(self) -> AsyncOrderResource:
+        return AsyncOrderResource(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncStoreResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/stainless-sdks/llama-stack-cli-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncStoreResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncStoreResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/stainless-sdks/llama-stack-cli-python#with_streaming_response
+        """
+        return AsyncStoreResourceWithStreamingResponse(self)
+
+    async def list_inventory(
+        self,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> StoreListInventoryResponse:
+        """Returns a map of status codes to quantities"""
+        return await self._get(
+            "/store/inventory",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=StoreListInventoryResponse,
+        )
+
+
+class StoreResourceWithRawResponse:
+    def __init__(self, store: StoreResource) -> None:
+        self._store = store
+
+        self.list_inventory = to_raw_response_wrapper(
+            store.list_inventory,
+        )
+
+    @cached_property
+    def order(self) -> OrderResourceWithRawResponse:
+        return OrderResourceWithRawResponse(self._store.order)
+
+
+class AsyncStoreResourceWithRawResponse:
+    def __init__(self, store: AsyncStoreResource) -> None:
+        self._store = store
+
+        self.list_inventory = async_to_raw_response_wrapper(
+            store.list_inventory,
+        )
+
+    @cached_property
+    def order(self) -> AsyncOrderResourceWithRawResponse:
+        return AsyncOrderResourceWithRawResponse(self._store.order)
+
+
+class StoreResourceWithStreamingResponse:
+    def __init__(self, store: StoreResource) -> None:
+        self._store = store
+
+        self.list_inventory = to_streamed_response_wrapper(
+            store.list_inventory,
+        )
+
+    @cached_property
+    def order(self) -> OrderResourceWithStreamingResponse:
+        return OrderResourceWithStreamingResponse(self._store.order)
+
+
+class AsyncStoreResourceWithStreamingResponse:
+    def __init__(self, store: AsyncStoreResource) -> None:
+        self._store = store
+
+        self.list_inventory = async_to_streamed_response_wrapper(
+            store.list_inventory,
+        )
+
+    @cached_property
+    def order(self) -> AsyncOrderResourceWithStreamingResponse:
+        return AsyncOrderResourceWithStreamingResponse(self._store.order)
diff --git a/src/llama_stack_cli/resources/user.py b/src/llama_stack_cli/resources/user.py
new file mode 100644
index 00000000..e9cf3e53
--- /dev/null
+++ b/src/llama_stack_cli/resources/user.py
@@ -0,0 +1,724 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Iterable
+
+import httpx
+
+from ..types import user_login_params, user_create_params, user_update_params
+from .._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
+from .._utils import maybe_transform, async_maybe_transform
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import (
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from ..types.user import User
+from .._base_client import make_request_options
+from ..types.user_param import UserParam
+
+__all__ = ["UserResource", "AsyncUserResource"]
+
+
+class UserResource(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> UserResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/stainless-sdks/llama-stack-cli-python#accessing-raw-response-data-eg-headers
+        """
+        return UserResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> UserResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/stainless-sdks/llama-stack-cli-python#with_streaming_response
+        """
+        return UserResourceWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        id: int | NotGiven = NOT_GIVEN,
+        email: str | NotGiven = NOT_GIVEN,
+        first_name: str | NotGiven = NOT_GIVEN,
+        last_name: str | NotGiven = NOT_GIVEN,
+        password: str | NotGiven = NOT_GIVEN,
+        phone: str | NotGiven = NOT_GIVEN,
+        username: str | NotGiven = NOT_GIVEN,
+        user_status: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> User:
+        """
+        This can only be done by the logged in user.
+
+        Args:
+          user_status: User Status
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/user",
+            body=maybe_transform(
+                {
+                    "id": id,
+                    "email": email,
+                    "first_name": first_name,
+                    "last_name": last_name,
+                    "password": password,
+                    "phone": phone,
+                    "username": username,
+                    "user_status": user_status,
+                },
+                user_create_params.UserCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=User,
+        )
+
+    def retrieve(
+        self,
+        username: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> User:
+        """
+        Get user by user name
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not username:
+            raise ValueError(f"Expected a non-empty value for `username` but received {username!r}")
+        return self._get(
+            f"/user/{username}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=User,
+        )
+
+    def update(
+        self,
+        existing_username: str,
+        *,
+        id: int | NotGiven = NOT_GIVEN,
+        email: str | NotGiven = NOT_GIVEN,
+        first_name: str | NotGiven = NOT_GIVEN,
+        last_name: str | NotGiven = NOT_GIVEN,
+        password: str | NotGiven = NOT_GIVEN,
+        phone: str | NotGiven = NOT_GIVEN,
+        username: str | NotGiven = NOT_GIVEN,
+        user_status: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> None:
+        """
+        This can only be done by the logged in user.
+
+        Args:
+          user_status: User Status
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not existing_username:
+            raise ValueError(f"Expected a non-empty value for `existing_username` but received {existing_username!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return self._put(
+            f"/user/{existing_username}",
+            body=maybe_transform(
+                {
+                    "id": id,
+                    "email": email,
+                    "first_name": first_name,
+                    "last_name": last_name,
+                    "password": password,
+                    "phone": phone,
+                    "username": username,
+                    "user_status": user_status,
+                },
+                user_update_params.UserUpdateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+    def delete(
+        self,
+        username: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> None:
+        """
+        This can only be done by the logged in user.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not username:
+            raise ValueError(f"Expected a non-empty value for `username` but received {username!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return self._delete(
+            f"/user/{username}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+    def create_with_list(
+        self,
+        *,
+        items: Iterable[UserParam] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> User:
+        """
+        Creates list of users with given input array
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/user/createWithList",
+            body=maybe_transform(items, Iterable[UserParam]),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=User,
+        )
+
+    def login(
+        self,
+        *,
+        password: str | NotGiven = NOT_GIVEN,
+        username: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> str:
+        """
+        Logs user into the system
+
+        Args:
+          password: The password for login in clear text
+
+          username: The user name for login
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get(
+            "/user/login",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "password": password,
+                        "username": username,
+                    },
+                    user_login_params.UserLoginParams,
+                ),
+            ),
+            cast_to=str,
+        )
+
+    def logout(
+        self,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> None:
+        """Logs out current logged in user session"""
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return self._get(
+            "/user/logout",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+
+class AsyncUserResource(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncUserResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/stainless-sdks/llama-stack-cli-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncUserResourceWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncUserResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/stainless-sdks/llama-stack-cli-python#with_streaming_response
+        """
+        return AsyncUserResourceWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        id: int | NotGiven = NOT_GIVEN,
+        email: str | NotGiven = NOT_GIVEN,
+        first_name: str | NotGiven = NOT_GIVEN,
+        last_name: str | NotGiven = NOT_GIVEN,
+        password: str | NotGiven = NOT_GIVEN,
+        phone: str | NotGiven = NOT_GIVEN,
+        username: str | NotGiven = NOT_GIVEN,
+        user_status: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> User:
+        """
+        This can only be done by the logged in user.
+
+        Args:
+          user_status: User Status
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/user",
+            body=await async_maybe_transform(
+                {
+                    "id": id,
+                    "email": email,
+                    "first_name": first_name,
+                    "last_name": last_name,
+                    "password": password,
+                    "phone": phone,
+                    "username": username,
+                    "user_status": user_status,
+                },
+                user_create_params.UserCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=User,
+        )
+
+    async def retrieve(
+        self,
+        username: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> User:
+        """
+        Get user by user name
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not username:
+            raise ValueError(f"Expected a non-empty value for `username` but received {username!r}")
+        return await self._get(
+            f"/user/{username}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=User,
+        )
+
+    async def update(
+        self,
+        existing_username: str,
+        *,
+        id: int | NotGiven = NOT_GIVEN,
+        email: str | NotGiven = NOT_GIVEN,
+        first_name: str | NotGiven = NOT_GIVEN,
+        last_name: str | NotGiven = NOT_GIVEN,
+        password: str | NotGiven = NOT_GIVEN,
+        phone: str | NotGiven = NOT_GIVEN,
+        username: str | NotGiven = NOT_GIVEN,
+        user_status: int | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> None:
+        """
+        This can only be done by the logged in user.
+
+        Args:
+          user_status: User Status
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not existing_username:
+            raise ValueError(f"Expected a non-empty value for `existing_username` but received {existing_username!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return await self._put(
+            f"/user/{existing_username}",
+            body=await async_maybe_transform(
+                {
+                    "id": id,
+                    "email": email,
+                    "first_name": first_name,
+                    "last_name": last_name,
+                    "password": password,
+                    "phone": phone,
+                    "username": username,
+                    "user_status": user_status,
+                },
+                user_update_params.UserUpdateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+    async def delete(
+        self,
+        username: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> None:
+        """
+        This can only be done by the logged in user.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not username:
+            raise ValueError(f"Expected a non-empty value for `username` but received {username!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return await self._delete(
+            f"/user/{username}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+    async def create_with_list(
+        self,
+        *,
+        items: Iterable[UserParam] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> User:
+        """
+        Creates list of users with given input array
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/user/createWithList",
+            body=await async_maybe_transform(items, Iterable[UserParam]),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=User,
+        )
+
+    async def login(
+        self,
+        *,
+        password: str | NotGiven = NOT_GIVEN,
+        username: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> str:
+        """
+        Logs user into the system
+
+        Args:
+          password: The password for login in clear text
+
+          username: The user name for login
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._get(
+            "/user/login",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=await async_maybe_transform(
+                    {
+                        "password": password,
+                        "username": username,
+                    },
+                    user_login_params.UserLoginParams,
+                ),
+            ),
+            cast_to=str,
+        )
+
+    async def logout(
+        self,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> None:
+        """Logs out current logged in user session"""
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return await self._get(
+            "/user/logout",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+
+class UserResourceWithRawResponse:
+    def __init__(self, user: UserResource) -> None:
+        self._user = user
+
+        self.create = to_raw_response_wrapper(
+            user.create,
+        )
+        self.retrieve = to_raw_response_wrapper(
+            user.retrieve,
+        )
+        self.update = to_raw_response_wrapper(
+            user.update,
+        )
+        self.delete = to_raw_response_wrapper(
+            user.delete,
+        )
+        self.create_with_list = to_raw_response_wrapper(
+            user.create_with_list,
+        )
+        self.login = to_raw_response_wrapper(
+            user.login,
+        )
+        self.logout = to_raw_response_wrapper(
+            user.logout,
+        )
+
+
+class AsyncUserResourceWithRawResponse:
+    def __init__(self, user: AsyncUserResource) -> None:
+        self._user = user
+
+        self.create = async_to_raw_response_wrapper(
+            user.create,
+        )
+        self.retrieve = async_to_raw_response_wrapper(
+            user.retrieve,
+        )
+        self.update = async_to_raw_response_wrapper(
+            user.update,
+        )
+        self.delete = async_to_raw_response_wrapper(
+            user.delete,
+        )
+        self.create_with_list = async_to_raw_response_wrapper(
+            user.create_with_list,
+        )
+        self.login = async_to_raw_response_wrapper(
+            user.login,
+        )
+        self.logout = async_to_raw_response_wrapper(
+            user.logout,
+        )
+
+
+class UserResourceWithStreamingResponse:
+    def __init__(self, user: UserResource) -> None:
+        self._user = user
+
+        self.create = to_streamed_response_wrapper(
+            user.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            user.retrieve,
+        )
+        self.update = to_streamed_response_wrapper(
+            user.update,
+        )
+        self.delete = to_streamed_response_wrapper(
+            user.delete,
+        )
+        self.create_with_list = to_streamed_response_wrapper(
+            user.create_with_list,
+        )
+        self.login = to_streamed_response_wrapper(
+            user.login,
+        )
+        self.logout = to_streamed_response_wrapper(
+            user.logout,
+        )
+
+
+class AsyncUserResourceWithStreamingResponse:
+    def __init__(self, user: AsyncUserResource) -> None:
+        self._user = user
+
+        self.create = async_to_streamed_response_wrapper(
+            user.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            user.retrieve,
+        )
+        self.update = async_to_streamed_response_wrapper(
+            user.update,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            user.delete,
+        )
+        self.create_with_list = async_to_streamed_response_wrapper(
+            user.create_with_list,
+        )
+        self.login = async_to_streamed_response_wrapper(
+            user.login,
+        )
+        self.logout = async_to_streamed_response_wrapper(
+            user.logout,
+        )
diff --git a/src/llama_stack_cli/types/__init__.py b/src/llama_stack_cli/types/__init__.py
new file mode 100644
index 00000000..6b7a8396
--- /dev/null
+++ b/src/llama_stack_cli/types/__init__.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .pet import Pet as Pet
+from .user import User as User
+from .shared import Order as Order
+from .category import Category as Category
+from .user_param import UserParam as UserParam
+from .category_param import CategoryParam as CategoryParam
+from .pet_create_params import PetCreateParams as PetCreateParams
+from .pet_update_params import PetUpdateParams as PetUpdateParams
+from .user_login_params import UserLoginParams as UserLoginParams
+from .user_create_params import UserCreateParams as UserCreateParams
+from .user_update_params import UserUpdateParams as UserUpdateParams
+from .user_login_response import UserLoginResponse as UserLoginResponse
+from .pet_find_by_tags_params import PetFindByTagsParams as PetFindByTagsParams
+from .pet_update_by_id_params import PetUpdateByIDParams as PetUpdateByIDParams
+from .pet_upload_image_params import PetUploadImageParams as PetUploadImageParams
+from .pet_find_by_status_params import PetFindByStatusParams as PetFindByStatusParams
+from .pet_find_by_tags_response import PetFindByTagsResponse as PetFindByTagsResponse
+from .pet_upload_image_response import PetUploadImageResponse as PetUploadImageResponse
+from .pet_find_by_status_response import PetFindByStatusResponse as PetFindByStatusResponse
+from .user_create_with_list_params import UserCreateWithListParams as UserCreateWithListParams
+from .store_list_inventory_response import StoreListInventoryResponse as StoreListInventoryResponse
diff --git a/src/llama_stack_cli/types/category.py b/src/llama_stack_cli/types/category.py
new file mode 100644
index 00000000..2ee29992
--- /dev/null
+++ b/src/llama_stack_cli/types/category.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from .._models import BaseModel
+
+__all__ = ["Category"]
+
+
+class Category(BaseModel):
+    id: Optional[int] = None
+
+    name: Optional[str] = None
diff --git a/src/llama_stack_client/types/tool_list_params.py b/src/llama_stack_cli/types/category_param.py
similarity index 52%
rename from src/llama_stack_client/types/tool_list_params.py
rename to src/llama_stack_cli/types/category_param.py
index 38f4bf73..2cdf2642 100644
--- a/src/llama_stack_client/types/tool_list_params.py
+++ b/src/llama_stack_cli/types/category_param.py
@@ -4,9 +4,10 @@
 
 from typing_extensions import TypedDict
 
-__all__ = ["ToolListParams"]
+__all__ = ["CategoryParam"]
 
 
-class ToolListParams(TypedDict, total=False):
-    toolgroup_id: str
-    """The ID of the tool group to list tools for."""
+class CategoryParam(TypedDict, total=False):
+    id: int
+
+    name: str
diff --git a/src/llama_stack_cli/types/pet.py b/src/llama_stack_cli/types/pet.py
new file mode 100644
index 00000000..b9aebf65
--- /dev/null
+++ b/src/llama_stack_cli/types/pet.py
@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from pydantic import Field as FieldInfo
+
+from .._models import BaseModel
+from .category import Category
+
+__all__ = ["Pet", "Tag"]
+
+
+class Tag(BaseModel):
+    id: Optional[int] = None
+
+    name: Optional[str] = None
+
+
+class Pet(BaseModel):
+    name: str
+
+    photo_urls: List[str] = FieldInfo(alias="photoUrls")
+
+    id: Optional[int] = None
+
+    category: Optional[Category] = None
+
+    status: Optional[Literal["available", "pending", "sold"]] = None
+    """pet status in the store"""
+
+    tags: Optional[List[Tag]] = None
diff --git a/src/llama_stack_cli/types/pet_create_params.py b/src/llama_stack_cli/types/pet_create_params.py
new file mode 100644
index 00000000..a547445a
--- /dev/null
+++ b/src/llama_stack_cli/types/pet_create_params.py
@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Iterable
+from typing_extensions import Literal, Required, Annotated, TypedDict
+
+from .._utils import PropertyInfo
+from .category_param import CategoryParam
+
+__all__ = ["PetCreateParams", "Tag"]
+
+
+class PetCreateParams(TypedDict, total=False):
+    name: Required[str]
+
+    photo_urls: Required[Annotated[List[str], PropertyInfo(alias="photoUrls")]]
+
+    id: int
+
+    category: CategoryParam
+
+    status: Literal["available", "pending", "sold"]
+    """pet status in the store"""
+
+    tags: Iterable[Tag]
+
+
+class Tag(TypedDict, total=False):
+    id: int
+
+    name: str
diff --git a/src/llama_stack_cli/types/pet_find_by_status_params.py b/src/llama_stack_cli/types/pet_find_by_status_params.py
new file mode 100644
index 00000000..a9e4cc8b
--- /dev/null
+++ b/src/llama_stack_cli/types/pet_find_by_status_params.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["PetFindByStatusParams"]
+
+
+class PetFindByStatusParams(TypedDict, total=False):
+    status: Literal["available", "pending", "sold"]
+    """Status values that need to be considered for filter"""
diff --git a/src/llama_stack_client/types/shield_list_response.py b/src/llama_stack_cli/types/pet_find_by_status_response.py
similarity index 59%
rename from src/llama_stack_client/types/shield_list_response.py
rename to src/llama_stack_cli/types/pet_find_by_status_response.py
index 0cba0500..95eed041 100644
--- a/src/llama_stack_client/types/shield_list_response.py
+++ b/src/llama_stack_cli/types/pet_find_by_status_response.py
@@ -3,8 +3,8 @@
 from typing import List
 from typing_extensions import TypeAlias
 
-from .shield import Shield
+from .pet import Pet
 
-__all__ = ["ShieldListResponse"]
+__all__ = ["PetFindByStatusResponse"]
 
-ShieldListResponse: TypeAlias = List[Shield]
+PetFindByStatusResponse: TypeAlias = List[Pet]
diff --git a/src/llama_stack_cli/types/pet_find_by_tags_params.py b/src/llama_stack_cli/types/pet_find_by_tags_params.py
new file mode 100644
index 00000000..91e98c65
--- /dev/null
+++ b/src/llama_stack_cli/types/pet_find_by_tags_params.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import TypedDict
+
+__all__ = ["PetFindByTagsParams"]
+
+
+class PetFindByTagsParams(TypedDict, total=False):
+    tags: List[str]
+    """Tags to filter by"""
diff --git a/src/llama_stack_client/types/model_list_response.py b/src/llama_stack_cli/types/pet_find_by_tags_response.py
similarity index 60%
rename from src/llama_stack_client/types/model_list_response.py
rename to src/llama_stack_cli/types/pet_find_by_tags_response.py
index 905cdb0f..32314ba8 100644
--- a/src/llama_stack_client/types/model_list_response.py
+++ b/src/llama_stack_cli/types/pet_find_by_tags_response.py
@@ -3,8 +3,8 @@
 from typing import List
 from typing_extensions import TypeAlias
 
-from .model import Model
+from .pet import Pet
 
-__all__ = ["ModelListResponse"]
+__all__ = ["PetFindByTagsResponse"]
 
-ModelListResponse: TypeAlias = List[Model]
+PetFindByTagsResponse: TypeAlias = List[Pet]
diff --git a/src/llama_stack_cli/types/pet_update_by_id_params.py b/src/llama_stack_cli/types/pet_update_by_id_params.py
new file mode 100644
index 00000000..96b42307
--- /dev/null
+++ b/src/llama_stack_cli/types/pet_update_by_id_params.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["PetUpdateByIDParams"]
+
+
+class PetUpdateByIDParams(TypedDict, total=False):
+    name: str
+    """Name of pet that needs to be updated"""
+
+    status: str
+    """Status of pet that needs to be updated"""
diff --git a/src/llama_stack_cli/types/pet_update_params.py b/src/llama_stack_cli/types/pet_update_params.py
new file mode 100644
index 00000000..78a4d5bf
--- /dev/null
+++ b/src/llama_stack_cli/types/pet_update_params.py
@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Iterable
+from typing_extensions import Literal, Required, Annotated, TypedDict
+
+from .._utils import PropertyInfo
+from .category_param import CategoryParam
+
+__all__ = ["PetUpdateParams", "Tag"]
+
+
+class PetUpdateParams(TypedDict, total=False):
+    name: Required[str]
+
+    photo_urls: Required[Annotated[List[str], PropertyInfo(alias="photoUrls")]]
+
+    id: int
+
+    category: CategoryParam
+
+    status: Literal["available", "pending", "sold"]
+    """pet status in the store"""
+
+    tags: Iterable[Tag]
+
+
+class Tag(TypedDict, total=False):
+    id: int
+
+    name: str
diff --git a/src/llama_stack_cli/types/pet_upload_image_params.py b/src/llama_stack_cli/types/pet_upload_image_params.py
new file mode 100644
index 00000000..53a6aca1
--- /dev/null
+++ b/src/llama_stack_cli/types/pet_upload_image_params.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Annotated, TypedDict
+
+from .._types import FileTypes
+from .._utils import PropertyInfo
+
+__all__ = ["PetUploadImageParams"]
+
+
+class PetUploadImageParams(TypedDict, total=False):
+    additional_metadata: Annotated[str, PropertyInfo(alias="additionalMetadata")]
+    """Additional Metadata"""
+
+    image: FileTypes
diff --git a/src/llama_stack_cli/types/pet_upload_image_response.py b/src/llama_stack_cli/types/pet_upload_image_response.py
new file mode 100644
index 00000000..6b39d71e
--- /dev/null
+++ b/src/llama_stack_cli/types/pet_upload_image_response.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from .._models import BaseModel
+
+__all__ = ["PetUploadImageResponse"]
+
+
+class PetUploadImageResponse(BaseModel):
+    code: Optional[int] = None
+
+    message: Optional[str] = None
+
+    type: Optional[str] = None
diff --git a/src/llama_stack_client/types/eval/__init__.py b/src/llama_stack_cli/types/shared/__init__.py
similarity index 71%
rename from src/llama_stack_client/types/eval/__init__.py
rename to src/llama_stack_cli/types/shared/__init__.py
index f8ee8b14..3d5c73d1 100644
--- a/src/llama_stack_client/types/eval/__init__.py
+++ b/src/llama_stack_cli/types/shared/__init__.py
@@ -1,3 +1,3 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from __future__ import annotations
+from .order import Order as Order
diff --git a/src/llama_stack_cli/types/shared/order.py b/src/llama_stack_cli/types/shared/order.py
new file mode 100644
index 00000000..cf3b571c
--- /dev/null
+++ b/src/llama_stack_cli/types/shared/order.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from datetime import datetime
+from typing_extensions import Literal
+
+from pydantic import Field as FieldInfo
+
+from ..._models import BaseModel
+
+__all__ = ["Order"]
+
+
+class Order(BaseModel):
+    id: Optional[int] = None
+
+    complete: Optional[bool] = None
+
+    pet_id: Optional[int] = FieldInfo(alias="petId", default=None)
+
+    quantity: Optional[int] = None
+
+    ship_date: Optional[datetime] = FieldInfo(alias="shipDate", default=None)
+
+    status: Optional[Literal["placed", "approved", "delivered"]] = None
+    """Order Status"""
diff --git a/src/llama_stack_cli/types/store/__init__.py b/src/llama_stack_cli/types/store/__init__.py
new file mode 100644
index 00000000..f7425318
--- /dev/null
+++ b/src/llama_stack_cli/types/store/__init__.py
@@ -0,0 +1,5 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .order_create_params import OrderCreateParams as OrderCreateParams
diff --git a/src/llama_stack_cli/types/store/order_create_params.py b/src/llama_stack_cli/types/store/order_create_params.py
new file mode 100644
index 00000000..2eadb108
--- /dev/null
+++ b/src/llama_stack_cli/types/store/order_create_params.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from datetime import datetime
+from typing_extensions import Literal, Annotated, TypedDict
+
+from ..._utils import PropertyInfo
+
+__all__ = ["OrderCreateParams"]
+
+
+class OrderCreateParams(TypedDict, total=False):
+    id: int
+
+    complete: bool
+
+    pet_id: Annotated[int, PropertyInfo(alias="petId")]
+
+    quantity: int
+
+    ship_date: Annotated[Union[str, datetime], PropertyInfo(alias="shipDate", format="iso8601")]
+
+    status: Literal["placed", "approved", "delivered"]
+    """Order Status"""
diff --git a/src/llama_stack_client/types/tool_list_response.py b/src/llama_stack_cli/types/store_list_inventory_response.py
similarity index 51%
rename from src/llama_stack_client/types/tool_list_response.py
rename to src/llama_stack_cli/types/store_list_inventory_response.py
index 11750ace..0a25d963 100644
--- a/src/llama_stack_client/types/tool_list_response.py
+++ b/src/llama_stack_cli/types/store_list_inventory_response.py
@@ -1,10 +1,8 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import List
+from typing import Dict
 from typing_extensions import TypeAlias
 
-from .tool import Tool
+__all__ = ["StoreListInventoryResponse"]
 
-__all__ = ["ToolListResponse"]
-
-ToolListResponse: TypeAlias = List[Tool]
+StoreListInventoryResponse: TypeAlias = Dict[str, int]
diff --git a/src/llama_stack_cli/types/user.py b/src/llama_stack_cli/types/user.py
new file mode 100644
index 00000000..db3d2516
--- /dev/null
+++ b/src/llama_stack_cli/types/user.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from pydantic import Field as FieldInfo
+
+from .._models import BaseModel
+
+__all__ = ["User"]
+
+
+class User(BaseModel):
+    id: Optional[int] = None
+
+    email: Optional[str] = None
+
+    first_name: Optional[str] = FieldInfo(alias="firstName", default=None)
+
+    last_name: Optional[str] = FieldInfo(alias="lastName", default=None)
+
+    password: Optional[str] = None
+
+    phone: Optional[str] = None
+
+    username: Optional[str] = None
+
+    user_status: Optional[int] = FieldInfo(alias="userStatus", default=None)
+    """User Status"""
diff --git a/src/llama_stack_cli/types/user_create_params.py b/src/llama_stack_cli/types/user_create_params.py
new file mode 100644
index 00000000..cf829df5
--- /dev/null
+++ b/src/llama_stack_cli/types/user_create_params.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Annotated, TypedDict
+
+from .._utils import PropertyInfo
+
+__all__ = ["UserCreateParams"]
+
+
+class UserCreateParams(TypedDict, total=False):
+    id: int
+
+    email: str
+
+    first_name: Annotated[str, PropertyInfo(alias="firstName")]
+
+    last_name: Annotated[str, PropertyInfo(alias="lastName")]
+
+    password: str
+
+    phone: str
+
+    username: str
+
+    user_status: Annotated[int, PropertyInfo(alias="userStatus")]
+    """User Status"""
diff --git a/src/llama_stack_cli/types/user_create_with_list_params.py b/src/llama_stack_cli/types/user_create_with_list_params.py
new file mode 100644
index 00000000..32b51db8
--- /dev/null
+++ b/src/llama_stack_cli/types/user_create_with_list_params.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Iterable
+from typing_extensions import TypedDict
+
+from .user_param import UserParam
+
+__all__ = ["UserCreateWithListParams"]
+
+
+class UserCreateWithListParams(TypedDict, total=False):
+    items: Iterable[UserParam]
diff --git a/src/llama_stack_cli/types/user_login_params.py b/src/llama_stack_cli/types/user_login_params.py
new file mode 100644
index 00000000..3128ccd6
--- /dev/null
+++ b/src/llama_stack_cli/types/user_login_params.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+__all__ = ["UserLoginParams"]
+
+
+class UserLoginParams(TypedDict, total=False):
+    password: str
+    """The password for login in clear text"""
+
+    username: str
+    """The user name for login"""
diff --git a/src/llama_stack_cli/types/user_login_response.py b/src/llama_stack_cli/types/user_login_response.py
new file mode 100644
index 00000000..30deea7e
--- /dev/null
+++ b/src/llama_stack_cli/types/user_login_response.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import TypeAlias
+
+__all__ = ["UserLoginResponse"]
+
+UserLoginResponse: TypeAlias = str
diff --git a/src/llama_stack_cli/types/user_param.py b/src/llama_stack_cli/types/user_param.py
new file mode 100644
index 00000000..cdf6047f
--- /dev/null
+++ b/src/llama_stack_cli/types/user_param.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Annotated, TypedDict
+
+from .._utils import PropertyInfo
+
+__all__ = ["UserParam"]
+
+
+class UserParam(TypedDict, total=False):
+    id: int
+
+    email: str
+
+    first_name: Annotated[str, PropertyInfo(alias="firstName")]
+
+    last_name: Annotated[str, PropertyInfo(alias="lastName")]
+
+    password: str
+
+    phone: str
+
+    username: str
+
+    user_status: Annotated[int, PropertyInfo(alias="userStatus")]
+    """User Status"""
diff --git a/src/llama_stack_cli/types/user_update_params.py b/src/llama_stack_cli/types/user_update_params.py
new file mode 100644
index 00000000..723b8fc6
--- /dev/null
+++ b/src/llama_stack_cli/types/user_update_params.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Annotated, TypedDict
+
+from .._utils import PropertyInfo
+
+__all__ = ["UserUpdateParams"]
+
+
+class UserUpdateParams(TypedDict, total=False):
+    id: int
+
+    email: str
+
+    first_name: Annotated[str, PropertyInfo(alias="firstName")]
+
+    last_name: Annotated[str, PropertyInfo(alias="lastName")]
+
+    password: str
+
+    phone: str
+
+    username: str
+
+    user_status: Annotated[int, PropertyInfo(alias="userStatus")]
+    """User Status"""
diff --git a/src/llama_stack_client/_client.py b/src/llama_stack_client/_client.py
deleted file mode 100644
index 409d8f5c..00000000
--- a/src/llama_stack_client/_client.py
+++ /dev/null
@@ -1,664 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-import json
-
-import os
-from typing import Any, Union, Mapping
-from typing_extensions import Self, override
-
-import httpx
-
-from . import _exceptions
-from ._qs import Querystring
-from ._types import (
-    NOT_GIVEN,
-    Omit,
-    Timeout,
-    NotGiven,
-    Transport,
-    ProxiesTypes,
-    RequestOptions,
-)
-from ._utils import is_given, get_async_library
-from ._version import __version__
-from .resources import (
-    files,
-    tools,
-    models,
-    routes,
-    safety,
-    inspect,
-    scoring,
-    shields,
-    datasets,
-    inference,
-    providers,
-    telemetry,
-    vector_io,
-    benchmarks,
-    embeddings,
-    toolgroups,
-    vector_dbs,
-    completions,
-    scoring_functions,
-    synthetic_data_generation,
-)
-from ._streaming import Stream as Stream, AsyncStream as AsyncStream
-from ._exceptions import APIStatusError
-from ._base_client import (
-    DEFAULT_MAX_RETRIES,
-    SyncAPIClient,
-    AsyncAPIClient,
-)
-from .resources.chat import chat
-from .resources.eval import eval
-from .resources.agents import agents
-from .resources.responses import responses
-from .resources.tool_runtime import tool_runtime
-from .resources.post_training import post_training
-from .resources.vector_stores import vector_stores
-
-__all__ = [
-    "Timeout",
-    "Transport",
-    "ProxiesTypes",
-    "RequestOptions",
-    "LlamaStackClient",
-    "AsyncLlamaStackClient",
-    "Client",
-    "AsyncClient",
-]
-
-
-class LlamaStackClient(SyncAPIClient):
-    toolgroups: toolgroups.ToolgroupsResource
-    tools: tools.ToolsResource
-    tool_runtime: tool_runtime.ToolRuntimeResource
-    responses: responses.ResponsesResource
-    agents: agents.AgentsResource
-    datasets: datasets.DatasetsResource
-    eval: eval.EvalResource
-    inspect: inspect.InspectResource
-    inference: inference.InferenceResource
-    embeddings: embeddings.EmbeddingsResource
-    chat: chat.ChatResource
-    completions: completions.CompletionsResource
-    vector_io: vector_io.VectorIoResource
-    vector_dbs: vector_dbs.VectorDBsResource
-    vector_stores: vector_stores.VectorStoresResource
-    models: models.ModelsResource
-    post_training: post_training.PostTrainingResource
-    providers: providers.ProvidersResource
-    routes: routes.RoutesResource
-    safety: safety.SafetyResource
-    shields: shields.ShieldsResource
-    synthetic_data_generation: synthetic_data_generation.SyntheticDataGenerationResource
-    telemetry: telemetry.TelemetryResource
-    scoring: scoring.ScoringResource
-    scoring_functions: scoring_functions.ScoringFunctionsResource
-    benchmarks: benchmarks.BenchmarksResource
-    files: files.FilesResource
-    with_raw_response: LlamaStackClientWithRawResponse
-    with_streaming_response: LlamaStackClientWithStreamedResponse
-
-    # client options
-    api_key: str | None
-
-    def __init__(
-        self,
-        *,
-        api_key: str | None = None,
-        base_url: str | httpx.URL | None = None,
-        timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
-        max_retries: int = DEFAULT_MAX_RETRIES,
-        default_headers: Mapping[str, str] | None = None,
-        default_query: Mapping[str, object] | None = None,
-        # Configure a custom httpx client.
-        # We provide a `DefaultHttpxClient` class that you can pass to retain the default values we use for `limits`, `timeout` & `follow_redirects`.
-        # See the [httpx documentation](https://www.python-httpx.org/api/#client) for more details.
-        http_client: httpx.Client | None = None,
-        # Enable or disable schema validation for data returned by the API.
-        # When enabled an error APIResponseValidationError is raised
-        # if the API responds with invalid data for the expected schema.
-        #
-        # This parameter may be removed or changed in the future.
-        # If you rely on this feature, please open a GitHub issue
-        # outlining your use-case to help us decide if it should be
-        # part of our public interface in the future.
-        _strict_response_validation: bool = False,
-        provider_data: Mapping[str, Any] | None = None,
-    ) -> None:
-        """Construct a new synchronous LlamaStackClient client instance.
-
-        This automatically infers the `api_key` argument from the `LLAMA_STACK_API_KEY` environment variable if it is not provided.
-        """
-        if api_key is None:
-            api_key = os.environ.get("LLAMA_STACK_API_KEY")
-        self.api_key = api_key
-
-        if base_url is None:
-            base_url = os.environ.get("LLAMA_STACK_BASE_URL")
-        if base_url is None:
-            base_url = f"http://any-hosted-llama-stack.com"
-
-        custom_headers = default_headers or {}
-        custom_headers["X-LlamaStack-Client-Version"] = __version__
-        if provider_data is not None:
-            custom_headers["X-LlamaStack-Provider-Data"] = json.dumps(provider_data)
-
-        super().__init__(
-            version=__version__,
-            base_url=base_url,
-            max_retries=max_retries,
-            timeout=timeout,
-            http_client=http_client,
-            custom_headers=custom_headers,
-            custom_query=default_query,
-            _strict_response_validation=_strict_response_validation,
-        )
-
-        self.toolgroups = toolgroups.ToolgroupsResource(self)
-        self.tools = tools.ToolsResource(self)
-        self.tool_runtime = tool_runtime.ToolRuntimeResource(self)
-        self.responses = responses.ResponsesResource(self)
-        self.agents = agents.AgentsResource(self)
-        self.datasets = datasets.DatasetsResource(self)
-        self.eval = eval.EvalResource(self)
-        self.inspect = inspect.InspectResource(self)
-        self.inference = inference.InferenceResource(self)
-        self.embeddings = embeddings.EmbeddingsResource(self)
-        self.chat = chat.ChatResource(self)
-        self.completions = completions.CompletionsResource(self)
-        self.vector_io = vector_io.VectorIoResource(self)
-        self.vector_dbs = vector_dbs.VectorDBsResource(self)
-        self.vector_stores = vector_stores.VectorStoresResource(self)
-        self.models = models.ModelsResource(self)
-        self.post_training = post_training.PostTrainingResource(self)
-        self.providers = providers.ProvidersResource(self)
-        self.routes = routes.RoutesResource(self)
-        self.safety = safety.SafetyResource(self)
-        self.shields = shields.ShieldsResource(self)
-        self.synthetic_data_generation = synthetic_data_generation.SyntheticDataGenerationResource(self)
-        self.telemetry = telemetry.TelemetryResource(self)
-        self.scoring = scoring.ScoringResource(self)
-        self.scoring_functions = scoring_functions.ScoringFunctionsResource(self)
-        self.benchmarks = benchmarks.BenchmarksResource(self)
-        self.files = files.FilesResource(self)
-        self.with_raw_response = LlamaStackClientWithRawResponse(self)
-        self.with_streaming_response = LlamaStackClientWithStreamedResponse(self)
-
-    @property
-    @override
-    def qs(self) -> Querystring:
-        return Querystring(array_format="comma")
-
-    @property
-    @override
-    def auth_headers(self) -> dict[str, str]:
-        api_key = self.api_key
-        if api_key is None:
-            return {}
-        return {"Authorization": f"Bearer {api_key}"}
-
-    @property
-    @override
-    def default_headers(self) -> dict[str, str | Omit]:
-        return {
-            **super().default_headers,
-            "X-Stainless-Async": "false",
-            **self._custom_headers,
-        }
-
-    def copy(
-        self,
-        *,
-        api_key: str | None = None,
-        base_url: str | httpx.URL | None = None,
-        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        http_client: httpx.Client | None = None,
-        max_retries: int | NotGiven = NOT_GIVEN,
-        default_headers: Mapping[str, str] | None = None,
-        set_default_headers: Mapping[str, str] | None = None,
-        default_query: Mapping[str, object] | None = None,
-        set_default_query: Mapping[str, object] | None = None,
-        _extra_kwargs: Mapping[str, Any] = {},
-    ) -> Self:
-        """
-        Create a new client instance re-using the same options given to the current client with optional overriding.
-        """
-        if default_headers is not None and set_default_headers is not None:
-            raise ValueError("The `default_headers` and `set_default_headers` arguments are mutually exclusive")
-
-        if default_query is not None and set_default_query is not None:
-            raise ValueError("The `default_query` and `set_default_query` arguments are mutually exclusive")
-
-        headers = self._custom_headers
-        if default_headers is not None:
-            headers = {**headers, **default_headers}
-        elif set_default_headers is not None:
-            headers = set_default_headers
-
-        params = self._custom_query
-        if default_query is not None:
-            params = {**params, **default_query}
-        elif set_default_query is not None:
-            params = set_default_query
-
-        http_client = http_client or self._client
-        return self.__class__(
-            api_key=api_key or self.api_key,
-            base_url=base_url or self.base_url,
-            timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
-            http_client=http_client,
-            max_retries=max_retries if is_given(max_retries) else self.max_retries,
-            default_headers=headers,
-            default_query=params,
-            **_extra_kwargs,
-        )
-
-    # Alias for `copy` for nicer inline usage, e.g.
-    # client.with_options(timeout=10).foo.create(...)
-    with_options = copy
-
-    @override
-    def _make_status_error(
-        self,
-        err_msg: str,
-        *,
-        body: object,
-        response: httpx.Response,
-    ) -> APIStatusError:
-        if response.status_code == 400:
-            return _exceptions.BadRequestError(err_msg, response=response, body=body)
-
-        if response.status_code == 401:
-            return _exceptions.AuthenticationError(err_msg, response=response, body=body)
-
-        if response.status_code == 403:
-            return _exceptions.PermissionDeniedError(err_msg, response=response, body=body)
-
-        if response.status_code == 404:
-            return _exceptions.NotFoundError(err_msg, response=response, body=body)
-
-        if response.status_code == 409:
-            return _exceptions.ConflictError(err_msg, response=response, body=body)
-
-        if response.status_code == 422:
-            return _exceptions.UnprocessableEntityError(err_msg, response=response, body=body)
-
-        if response.status_code == 429:
-            return _exceptions.RateLimitError(err_msg, response=response, body=body)
-
-        if response.status_code >= 500:
-            return _exceptions.InternalServerError(err_msg, response=response, body=body)
-        return APIStatusError(err_msg, response=response, body=body)
-
-
-class AsyncLlamaStackClient(AsyncAPIClient):
-    toolgroups: toolgroups.AsyncToolgroupsResource
-    tools: tools.AsyncToolsResource
-    tool_runtime: tool_runtime.AsyncToolRuntimeResource
-    responses: responses.AsyncResponsesResource
-    agents: agents.AsyncAgentsResource
-    datasets: datasets.AsyncDatasetsResource
-    eval: eval.AsyncEvalResource
-    inspect: inspect.AsyncInspectResource
-    inference: inference.AsyncInferenceResource
-    embeddings: embeddings.AsyncEmbeddingsResource
-    chat: chat.AsyncChatResource
-    completions: completions.AsyncCompletionsResource
-    vector_io: vector_io.AsyncVectorIoResource
-    vector_dbs: vector_dbs.AsyncVectorDBsResource
-    vector_stores: vector_stores.AsyncVectorStoresResource
-    models: models.AsyncModelsResource
-    post_training: post_training.AsyncPostTrainingResource
-    providers: providers.AsyncProvidersResource
-    routes: routes.AsyncRoutesResource
-    safety: safety.AsyncSafetyResource
-    shields: shields.AsyncShieldsResource
-    synthetic_data_generation: synthetic_data_generation.AsyncSyntheticDataGenerationResource
-    telemetry: telemetry.AsyncTelemetryResource
-    scoring: scoring.AsyncScoringResource
-    scoring_functions: scoring_functions.AsyncScoringFunctionsResource
-    benchmarks: benchmarks.AsyncBenchmarksResource
-    files: files.AsyncFilesResource
-    with_raw_response: AsyncLlamaStackClientWithRawResponse
-    with_streaming_response: AsyncLlamaStackClientWithStreamedResponse
-
-    # client options
-    api_key: str | None
-
-    def __init__(
-        self,
-        *,
-        api_key: str | None = None,
-        base_url: str | httpx.URL | None = None,
-        timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
-        max_retries: int = DEFAULT_MAX_RETRIES,
-        default_headers: Mapping[str, str] | None = None,
-        default_query: Mapping[str, object] | None = None,
-        # Configure a custom httpx client.
-        # We provide a `DefaultAsyncHttpxClient` class that you can pass to retain the default values we use for `limits`, `timeout` & `follow_redirects`.
-        # See the [httpx documentation](https://www.python-httpx.org/api/#asyncclient) for more details.
-        http_client: httpx.AsyncClient | None = None,
-        # Enable or disable schema validation for data returned by the API.
-        # When enabled an error APIResponseValidationError is raised
-        # if the API responds with invalid data for the expected schema.
-        #
-        # This parameter may be removed or changed in the future.
-        # If you rely on this feature, please open a GitHub issue
-        # outlining your use-case to help us decide if it should be
-        # part of our public interface in the future.
-        _strict_response_validation: bool = False,
-        provider_data: Mapping[str, Any] | None = None,
-    ) -> None:
-        """Construct a new async AsyncLlamaStackClient client instance.
-
-        This automatically infers the `api_key` argument from the `LLAMA_STACK_API_KEY` environment variable if it is not provided.
-        """
-        if api_key is None:
-            api_key = os.environ.get("LLAMA_STACK_API_KEY")
-        self.api_key = api_key
-
-        if base_url is None:
-            base_url = os.environ.get("LLAMA_STACK_BASE_URL")
-        if base_url is None:
-            base_url = f"http://any-hosted-llama-stack.com"
-
-        custom_headers = default_headers or {}
-        custom_headers["X-LlamaStack-Client-Version"] = __version__
-        if provider_data is not None:
-            custom_headers["X-LlamaStack-Provider-Data"] = json.dumps(provider_data)
-
-        super().__init__(
-            version=__version__,
-            base_url=base_url,
-            max_retries=max_retries,
-            timeout=timeout,
-            http_client=http_client,
-            custom_headers=custom_headers,
-            custom_query=default_query,
-            _strict_response_validation=_strict_response_validation,
-        )
-
-        self.toolgroups = toolgroups.AsyncToolgroupsResource(self)
-        self.tools = tools.AsyncToolsResource(self)
-        self.tool_runtime = tool_runtime.AsyncToolRuntimeResource(self)
-        self.responses = responses.AsyncResponsesResource(self)
-        self.agents = agents.AsyncAgentsResource(self)
-        self.datasets = datasets.AsyncDatasetsResource(self)
-        self.eval = eval.AsyncEvalResource(self)
-        self.inspect = inspect.AsyncInspectResource(self)
-        self.inference = inference.AsyncInferenceResource(self)
-        self.embeddings = embeddings.AsyncEmbeddingsResource(self)
-        self.chat = chat.AsyncChatResource(self)
-        self.completions = completions.AsyncCompletionsResource(self)
-        self.vector_io = vector_io.AsyncVectorIoResource(self)
-        self.vector_dbs = vector_dbs.AsyncVectorDBsResource(self)
-        self.vector_stores = vector_stores.AsyncVectorStoresResource(self)
-        self.models = models.AsyncModelsResource(self)
-        self.post_training = post_training.AsyncPostTrainingResource(self)
-        self.providers = providers.AsyncProvidersResource(self)
-        self.routes = routes.AsyncRoutesResource(self)
-        self.safety = safety.AsyncSafetyResource(self)
-        self.shields = shields.AsyncShieldsResource(self)
-        self.synthetic_data_generation = synthetic_data_generation.AsyncSyntheticDataGenerationResource(self)
-        self.telemetry = telemetry.AsyncTelemetryResource(self)
-        self.scoring = scoring.AsyncScoringResource(self)
-        self.scoring_functions = scoring_functions.AsyncScoringFunctionsResource(self)
-        self.benchmarks = benchmarks.AsyncBenchmarksResource(self)
-        self.files = files.AsyncFilesResource(self)
-        self.with_raw_response = AsyncLlamaStackClientWithRawResponse(self)
-        self.with_streaming_response = AsyncLlamaStackClientWithStreamedResponse(self)
-
-    @property
-    @override
-    def qs(self) -> Querystring:
-        return Querystring(array_format="comma")
-
-    @property
-    @override
-    def auth_headers(self) -> dict[str, str]:
-        api_key = self.api_key
-        if api_key is None:
-            return {}
-        return {"Authorization": f"Bearer {api_key}"}
-
-    @property
-    @override
-    def default_headers(self) -> dict[str, str | Omit]:
-        return {
-            **super().default_headers,
-            "X-Stainless-Async": f"async:{get_async_library()}",
-            **self._custom_headers,
-        }
-
-    def copy(
-        self,
-        *,
-        api_key: str | None = None,
-        base_url: str | httpx.URL | None = None,
-        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        http_client: httpx.AsyncClient | None = None,
-        max_retries: int | NotGiven = NOT_GIVEN,
-        default_headers: Mapping[str, str] | None = None,
-        set_default_headers: Mapping[str, str] | None = None,
-        default_query: Mapping[str, object] | None = None,
-        set_default_query: Mapping[str, object] | None = None,
-        _extra_kwargs: Mapping[str, Any] = {},
-    ) -> Self:
-        """
-        Create a new client instance re-using the same options given to the current client with optional overriding.
-        """
-        if default_headers is not None and set_default_headers is not None:
-            raise ValueError("The `default_headers` and `set_default_headers` arguments are mutually exclusive")
-
-        if default_query is not None and set_default_query is not None:
-            raise ValueError("The `default_query` and `set_default_query` arguments are mutually exclusive")
-
-        headers = self._custom_headers
-        if default_headers is not None:
-            headers = {**headers, **default_headers}
-        elif set_default_headers is not None:
-            headers = set_default_headers
-
-        params = self._custom_query
-        if default_query is not None:
-            params = {**params, **default_query}
-        elif set_default_query is not None:
-            params = set_default_query
-
-        http_client = http_client or self._client
-        return self.__class__(
-            api_key=api_key or self.api_key,
-            base_url=base_url or self.base_url,
-            timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
-            http_client=http_client,
-            max_retries=max_retries if is_given(max_retries) else self.max_retries,
-            default_headers=headers,
-            default_query=params,
-            **_extra_kwargs,
-        )
-
-    # Alias for `copy` for nicer inline usage, e.g.
-    # client.with_options(timeout=10).foo.create(...)
-    with_options = copy
-
-    @override
-    def _make_status_error(
-        self,
-        err_msg: str,
-        *,
-        body: object,
-        response: httpx.Response,
-    ) -> APIStatusError:
-        if response.status_code == 400:
-            return _exceptions.BadRequestError(err_msg, response=response, body=body)
-
-        if response.status_code == 401:
-            return _exceptions.AuthenticationError(err_msg, response=response, body=body)
-
-        if response.status_code == 403:
-            return _exceptions.PermissionDeniedError(err_msg, response=response, body=body)
-
-        if response.status_code == 404:
-            return _exceptions.NotFoundError(err_msg, response=response, body=body)
-
-        if response.status_code == 409:
-            return _exceptions.ConflictError(err_msg, response=response, body=body)
-
-        if response.status_code == 422:
-            return _exceptions.UnprocessableEntityError(err_msg, response=response, body=body)
-
-        if response.status_code == 429:
-            return _exceptions.RateLimitError(err_msg, response=response, body=body)
-
-        if response.status_code >= 500:
-            return _exceptions.InternalServerError(err_msg, response=response, body=body)
-        return APIStatusError(err_msg, response=response, body=body)
-
-
-class LlamaStackClientWithRawResponse:
-    def __init__(self, client: LlamaStackClient) -> None:
-        self.toolgroups = toolgroups.ToolgroupsResourceWithRawResponse(client.toolgroups)
-        self.tools = tools.ToolsResourceWithRawResponse(client.tools)
-        self.tool_runtime = tool_runtime.ToolRuntimeResourceWithRawResponse(client.tool_runtime)
-        self.responses = responses.ResponsesResourceWithRawResponse(client.responses)
-        self.agents = agents.AgentsResourceWithRawResponse(client.agents)
-        self.datasets = datasets.DatasetsResourceWithRawResponse(client.datasets)
-        self.eval = eval.EvalResourceWithRawResponse(client.eval)
-        self.inspect = inspect.InspectResourceWithRawResponse(client.inspect)
-        self.inference = inference.InferenceResourceWithRawResponse(client.inference)
-        self.embeddings = embeddings.EmbeddingsResourceWithRawResponse(client.embeddings)
-        self.chat = chat.ChatResourceWithRawResponse(client.chat)
-        self.completions = completions.CompletionsResourceWithRawResponse(client.completions)
-        self.vector_io = vector_io.VectorIoResourceWithRawResponse(client.vector_io)
-        self.vector_dbs = vector_dbs.VectorDBsResourceWithRawResponse(client.vector_dbs)
-        self.vector_stores = vector_stores.VectorStoresResourceWithRawResponse(client.vector_stores)
-        self.models = models.ModelsResourceWithRawResponse(client.models)
-        self.post_training = post_training.PostTrainingResourceWithRawResponse(client.post_training)
-        self.providers = providers.ProvidersResourceWithRawResponse(client.providers)
-        self.routes = routes.RoutesResourceWithRawResponse(client.routes)
-        self.safety = safety.SafetyResourceWithRawResponse(client.safety)
-        self.shields = shields.ShieldsResourceWithRawResponse(client.shields)
-        self.synthetic_data_generation = synthetic_data_generation.SyntheticDataGenerationResourceWithRawResponse(
-            client.synthetic_data_generation
-        )
-        self.telemetry = telemetry.TelemetryResourceWithRawResponse(client.telemetry)
-        self.scoring = scoring.ScoringResourceWithRawResponse(client.scoring)
-        self.scoring_functions = scoring_functions.ScoringFunctionsResourceWithRawResponse(client.scoring_functions)
-        self.benchmarks = benchmarks.BenchmarksResourceWithRawResponse(client.benchmarks)
-        self.files = files.FilesResourceWithRawResponse(client.files)
-
-
-class AsyncLlamaStackClientWithRawResponse:
-    def __init__(self, client: AsyncLlamaStackClient) -> None:
-        self.toolgroups = toolgroups.AsyncToolgroupsResourceWithRawResponse(client.toolgroups)
-        self.tools = tools.AsyncToolsResourceWithRawResponse(client.tools)
-        self.tool_runtime = tool_runtime.AsyncToolRuntimeResourceWithRawResponse(client.tool_runtime)
-        self.responses = responses.AsyncResponsesResourceWithRawResponse(client.responses)
-        self.agents = agents.AsyncAgentsResourceWithRawResponse(client.agents)
-        self.datasets = datasets.AsyncDatasetsResourceWithRawResponse(client.datasets)
-        self.eval = eval.AsyncEvalResourceWithRawResponse(client.eval)
-        self.inspect = inspect.AsyncInspectResourceWithRawResponse(client.inspect)
-        self.inference = inference.AsyncInferenceResourceWithRawResponse(client.inference)
-        self.embeddings = embeddings.AsyncEmbeddingsResourceWithRawResponse(client.embeddings)
-        self.chat = chat.AsyncChatResourceWithRawResponse(client.chat)
-        self.completions = completions.AsyncCompletionsResourceWithRawResponse(client.completions)
-        self.vector_io = vector_io.AsyncVectorIoResourceWithRawResponse(client.vector_io)
-        self.vector_dbs = vector_dbs.AsyncVectorDBsResourceWithRawResponse(client.vector_dbs)
-        self.vector_stores = vector_stores.AsyncVectorStoresResourceWithRawResponse(client.vector_stores)
-        self.models = models.AsyncModelsResourceWithRawResponse(client.models)
-        self.post_training = post_training.AsyncPostTrainingResourceWithRawResponse(client.post_training)
-        self.providers = providers.AsyncProvidersResourceWithRawResponse(client.providers)
-        self.routes = routes.AsyncRoutesResourceWithRawResponse(client.routes)
-        self.safety = safety.AsyncSafetyResourceWithRawResponse(client.safety)
-        self.shields = shields.AsyncShieldsResourceWithRawResponse(client.shields)
-        self.synthetic_data_generation = synthetic_data_generation.AsyncSyntheticDataGenerationResourceWithRawResponse(
-            client.synthetic_data_generation
-        )
-        self.telemetry = telemetry.AsyncTelemetryResourceWithRawResponse(client.telemetry)
-        self.scoring = scoring.AsyncScoringResourceWithRawResponse(client.scoring)
-        self.scoring_functions = scoring_functions.AsyncScoringFunctionsResourceWithRawResponse(
-            client.scoring_functions
-        )
-        self.benchmarks = benchmarks.AsyncBenchmarksResourceWithRawResponse(client.benchmarks)
-        self.files = files.AsyncFilesResourceWithRawResponse(client.files)
-
-
-class LlamaStackClientWithStreamedResponse:
-    def __init__(self, client: LlamaStackClient) -> None:
-        self.toolgroups = toolgroups.ToolgroupsResourceWithStreamingResponse(client.toolgroups)
-        self.tools = tools.ToolsResourceWithStreamingResponse(client.tools)
-        self.tool_runtime = tool_runtime.ToolRuntimeResourceWithStreamingResponse(client.tool_runtime)
-        self.responses = responses.ResponsesResourceWithStreamingResponse(client.responses)
-        self.agents = agents.AgentsResourceWithStreamingResponse(client.agents)
-        self.datasets = datasets.DatasetsResourceWithStreamingResponse(client.datasets)
-        self.eval = eval.EvalResourceWithStreamingResponse(client.eval)
-        self.inspect = inspect.InspectResourceWithStreamingResponse(client.inspect)
-        self.inference = inference.InferenceResourceWithStreamingResponse(client.inference)
-        self.embeddings = embeddings.EmbeddingsResourceWithStreamingResponse(client.embeddings)
-        self.chat = chat.ChatResourceWithStreamingResponse(client.chat)
-        self.completions = completions.CompletionsResourceWithStreamingResponse(client.completions)
-        self.vector_io = vector_io.VectorIoResourceWithStreamingResponse(client.vector_io)
-        self.vector_dbs = vector_dbs.VectorDBsResourceWithStreamingResponse(client.vector_dbs)
-        self.vector_stores = vector_stores.VectorStoresResourceWithStreamingResponse(client.vector_stores)
-        self.models = models.ModelsResourceWithStreamingResponse(client.models)
-        self.post_training = post_training.PostTrainingResourceWithStreamingResponse(client.post_training)
-        self.providers = providers.ProvidersResourceWithStreamingResponse(client.providers)
-        self.routes = routes.RoutesResourceWithStreamingResponse(client.routes)
-        self.safety = safety.SafetyResourceWithStreamingResponse(client.safety)
-        self.shields = shields.ShieldsResourceWithStreamingResponse(client.shields)
-        self.synthetic_data_generation = synthetic_data_generation.SyntheticDataGenerationResourceWithStreamingResponse(
-            client.synthetic_data_generation
-        )
-        self.telemetry = telemetry.TelemetryResourceWithStreamingResponse(client.telemetry)
-        self.scoring = scoring.ScoringResourceWithStreamingResponse(client.scoring)
-        self.scoring_functions = scoring_functions.ScoringFunctionsResourceWithStreamingResponse(
-            client.scoring_functions
-        )
-        self.benchmarks = benchmarks.BenchmarksResourceWithStreamingResponse(client.benchmarks)
-        self.files = files.FilesResourceWithStreamingResponse(client.files)
-
-
-class AsyncLlamaStackClientWithStreamedResponse:
-    def __init__(self, client: AsyncLlamaStackClient) -> None:
-        self.toolgroups = toolgroups.AsyncToolgroupsResourceWithStreamingResponse(client.toolgroups)
-        self.tools = tools.AsyncToolsResourceWithStreamingResponse(client.tools)
-        self.tool_runtime = tool_runtime.AsyncToolRuntimeResourceWithStreamingResponse(client.tool_runtime)
-        self.responses = responses.AsyncResponsesResourceWithStreamingResponse(client.responses)
-        self.agents = agents.AsyncAgentsResourceWithStreamingResponse(client.agents)
-        self.datasets = datasets.AsyncDatasetsResourceWithStreamingResponse(client.datasets)
-        self.eval = eval.AsyncEvalResourceWithStreamingResponse(client.eval)
-        self.inspect = inspect.AsyncInspectResourceWithStreamingResponse(client.inspect)
-        self.inference = inference.AsyncInferenceResourceWithStreamingResponse(client.inference)
-        self.embeddings = embeddings.AsyncEmbeddingsResourceWithStreamingResponse(client.embeddings)
-        self.chat = chat.AsyncChatResourceWithStreamingResponse(client.chat)
-        self.completions = completions.AsyncCompletionsResourceWithStreamingResponse(client.completions)
-        self.vector_io = vector_io.AsyncVectorIoResourceWithStreamingResponse(client.vector_io)
-        self.vector_dbs = vector_dbs.AsyncVectorDBsResourceWithStreamingResponse(client.vector_dbs)
-        self.vector_stores = vector_stores.AsyncVectorStoresResourceWithStreamingResponse(client.vector_stores)
-        self.models = models.AsyncModelsResourceWithStreamingResponse(client.models)
-        self.post_training = post_training.AsyncPostTrainingResourceWithStreamingResponse(client.post_training)
-        self.providers = providers.AsyncProvidersResourceWithStreamingResponse(client.providers)
-        self.routes = routes.AsyncRoutesResourceWithStreamingResponse(client.routes)
-        self.safety = safety.AsyncSafetyResourceWithStreamingResponse(client.safety)
-        self.shields = shields.AsyncShieldsResourceWithStreamingResponse(client.shields)
-        self.synthetic_data_generation = (
-            synthetic_data_generation.AsyncSyntheticDataGenerationResourceWithStreamingResponse(
-                client.synthetic_data_generation
-            )
-        )
-        self.telemetry = telemetry.AsyncTelemetryResourceWithStreamingResponse(client.telemetry)
-        self.scoring = scoring.AsyncScoringResourceWithStreamingResponse(client.scoring)
-        self.scoring_functions = scoring_functions.AsyncScoringFunctionsResourceWithStreamingResponse(
-            client.scoring_functions
-        )
-        self.benchmarks = benchmarks.AsyncBenchmarksResourceWithStreamingResponse(client.benchmarks)
-        self.files = files.AsyncFilesResourceWithStreamingResponse(client.files)
-
-
-Client = LlamaStackClient
-
-AsyncClient = AsyncLlamaStackClient
diff --git a/src/llama_stack_client/_utils/_resources_proxy.py b/src/llama_stack_client/_utils/_resources_proxy.py
deleted file mode 100644
index bf0a876a..00000000
--- a/src/llama_stack_client/_utils/_resources_proxy.py
+++ /dev/null
@@ -1,24 +0,0 @@
-from __future__ import annotations
-
-from typing import Any
-from typing_extensions import override
-
-from ._proxy import LazyProxy
-
-
-class ResourcesProxy(LazyProxy[Any]):
-    """A proxy for the `llama_stack_client.resources` module.
-
-    This is used so that we can lazily import `llama_stack_client.resources` only when
-    needed *and* so that users can just import `llama_stack_client` and reference `llama_stack_client.resources`
-    """
-
-    @override
-    def __load__(self) -> Any:
-        import importlib
-
-        mod = importlib.import_module("llama_stack_client.resources")
-        return mod
-
-
-resources = ResourcesProxy().__as_proxied__()
diff --git a/src/llama_stack_client/_wrappers.py b/src/llama_stack_client/_wrappers.py
deleted file mode 100644
index 471b39dd..00000000
--- a/src/llama_stack_client/_wrappers.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Generic, TypeVar
-
-from ._models import GenericModel
-
-__all__ = ["DataWrapper"]
-
-_T = TypeVar("_T")
-
-
-class DataWrapper(GenericModel, Generic[_T]):
-    data: _T
-
-    @staticmethod
-    def _unwrapper(obj: "DataWrapper[_T]") -> _T:
-        return obj.data
diff --git a/src/llama_stack_client/lib/__init__.py b/src/llama_stack_client/lib/__init__.py
deleted file mode 100644
index 6bc5d151..00000000
--- a/src/llama_stack_client/lib/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .tools.mcp_oauth import get_oauth_token_for_mcp_server
-
-__all__ = ["get_oauth_token_for_mcp_server"]
diff --git a/src/llama_stack_client/lib/agents/__init__.py b/src/llama_stack_client/lib/agents/__init__.py
deleted file mode 100644
index 756f351d..00000000
--- a/src/llama_stack_client/lib/agents/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
diff --git a/src/llama_stack_client/lib/agents/agent.py b/src/llama_stack_client/lib/agents/agent.py
deleted file mode 100644
index ebdc4abd..00000000
--- a/src/llama_stack_client/lib/agents/agent.py
+++ /dev/null
@@ -1,601 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-import logging
-from typing import Any, AsyncIterator, Callable, Iterator, List, Optional, Tuple, Union
-
-from llama_stack_client import LlamaStackClient
-from llama_stack_client.types import ToolResponseMessage, ToolResponseParam, UserMessage
-from llama_stack_client.types.agent_create_params import AgentConfig
-from llama_stack_client.types.agents.agent_turn_response_stream_chunk import (
-    AgentTurnResponseStreamChunk,
-)
-from llama_stack_client.types.agents.turn import CompletionMessage, Turn
-from llama_stack_client.types.agents.turn_create_params import Document, Toolgroup
-from llama_stack_client.types.shared.tool_call import ToolCall
-from llama_stack_client.types.shared_params.agent_config import ToolConfig
-from llama_stack_client.types.shared_params.response_format import ResponseFormat
-from llama_stack_client.types.shared_params.sampling_params import SamplingParams
-
-from ..._types import Headers
-from .client_tool import ClientTool, client_tool
-from .tool_parser import ToolParser
-
-DEFAULT_MAX_ITER = 10
-
-logger = logging.getLogger(__name__)
-
-
-class AgentUtils:
-    @staticmethod
-    def get_client_tools(
-        tools: Optional[List[Union[Toolgroup, ClientTool, Callable[..., Any]]]],
-    ) -> List[ClientTool]:
-        if not tools:
-            return []
-
-        # Wrap any function in client_tool decorator
-        tools = [client_tool(tool) if (callable(tool) and not isinstance(tool, ClientTool)) else tool for tool in tools]
-        return [tool for tool in tools if isinstance(tool, ClientTool)]
-
-    @staticmethod
-    def get_tool_calls(chunk: AgentTurnResponseStreamChunk, tool_parser: Optional[ToolParser] = None) -> List[ToolCall]:
-        if chunk.event.payload.event_type not in {
-            "turn_complete",
-            "turn_awaiting_input",
-        }:
-            return []
-
-        message = chunk.event.payload.turn.output_message
-        if message.stop_reason == "out_of_tokens":
-            return []
-
-        if tool_parser:
-            return tool_parser.get_tool_calls(message)
-
-        return message.tool_calls
-
-    @staticmethod
-    def get_turn_id(chunk: AgentTurnResponseStreamChunk) -> Optional[str]:
-        if chunk.event.payload.event_type not in [
-            "turn_complete",
-            "turn_awaiting_input",
-        ]:
-            return None
-
-        return chunk.event.payload.turn.turn_id
-
-    @staticmethod
-    def get_agent_config(
-        model: Optional[str] = None,
-        instructions: Optional[str] = None,
-        tools: Optional[List[Union[Toolgroup, ClientTool, Callable[..., Any]]]] = None,
-        tool_config: Optional[ToolConfig] = None,
-        sampling_params: Optional[SamplingParams] = None,
-        max_infer_iters: Optional[int] = None,
-        input_shields: Optional[List[str]] = None,
-        output_shields: Optional[List[str]] = None,
-        response_format: Optional[ResponseFormat] = None,
-        enable_session_persistence: Optional[bool] = None,
-    ) -> AgentConfig:
-        # Create a minimal valid AgentConfig with required fields
-        if model is None or instructions is None:
-            raise ValueError("Both 'model' and 'instructions' are required when agent_config is not provided")
-
-        agent_config = {
-            "model": model,
-            "instructions": instructions,
-            "toolgroups": [],
-            "client_tools": [],
-        }
-
-        # Add optional parameters if provided
-        if enable_session_persistence is not None:
-            agent_config["enable_session_persistence"] = enable_session_persistence
-        if max_infer_iters is not None:
-            agent_config["max_infer_iters"] = max_infer_iters
-        if input_shields is not None:
-            agent_config["input_shields"] = input_shields
-        if output_shields is not None:
-            agent_config["output_shields"] = output_shields
-        if response_format is not None:
-            agent_config["response_format"] = response_format
-        if sampling_params is not None:
-            agent_config["sampling_params"] = sampling_params
-        if tool_config is not None:
-            agent_config["tool_config"] = tool_config
-        if tools is not None:
-            toolgroups: List[Toolgroup] = []
-            for tool in tools:
-                if isinstance(tool, str) or isinstance(tool, dict):
-                    toolgroups.append(tool)
-
-            agent_config["toolgroups"] = toolgroups
-            agent_config["client_tools"] = [tool.get_tool_definition() for tool in AgentUtils.get_client_tools(tools)]
-
-        agent_config = AgentConfig(**agent_config)
-        return agent_config
-
-
-class Agent:
-    def __init__(
-        self,
-        client: LlamaStackClient,
-        # begin deprecated
-        agent_config: Optional[AgentConfig] = None,
-        client_tools: Tuple[ClientTool, ...] = (),
-        # end deprecated
-        tool_parser: Optional[ToolParser] = None,
-        model: Optional[str] = None,
-        instructions: Optional[str] = None,
-        tools: Optional[List[Union[Toolgroup, ClientTool, Callable[..., Any]]]] = None,
-        tool_config: Optional[ToolConfig] = None,
-        sampling_params: Optional[SamplingParams] = None,
-        max_infer_iters: Optional[int] = None,
-        input_shields: Optional[List[str]] = None,
-        output_shields: Optional[List[str]] = None,
-        response_format: Optional[ResponseFormat] = None,
-        enable_session_persistence: Optional[bool] = None,
-        extra_headers: Headers | None = None,
-    ):
-        """Construct an Agent with the given parameters.
-
-        :param client: The LlamaStackClient instance.
-        :param agent_config: The AgentConfig instance.
-            ::deprecated: use other parameters instead
-        :param client_tools: A tuple of ClientTool instances.
-            ::deprecated: use tools instead
-        :param tool_parser: Custom logic that parses tool calls from a message.
-        :param model: The model to use for the agent.
-        :param instructions: The instructions for the agent.
-        :param tools: A list of tools for the agent. Values can be one of the following:
-            - dict representing a toolgroup/tool with arguments: e.g. {"name": "builtin::rag/knowledge_search", "args": {"vector_db_ids": [123]}}
-            - a python function with a docstring. See @client_tool for more details.
-            - str representing a tool within a toolgroup: e.g. "builtin::rag/knowledge_search"
-            - str representing a toolgroup_id: e.g. "builtin::rag", "builtin::code_interpreter", where all tools in the toolgroup will be added to the agent
-            - an instance of ClientTool: A client tool object.
-        :param tool_config: The tool configuration for the agent.
-        :param sampling_params: The sampling parameters for the agent.
-        :param max_infer_iters: The maximum number of inference iterations.
-        :param input_shields: The input shields for the agent.
-        :param output_shields: The output shields for the agent.
-        :param response_format: The response format for the agent.
-        :param enable_session_persistence: Whether to enable session persistence.
-        :param extra_headers: Extra headers to add to all requests sent by the agent.
-        """
-        self.client = client
-
-        if agent_config is not None:
-            logger.warning("`agent_config` is deprecated. Use inlined parameters instead.")
-        if client_tools != ():
-            logger.warning("`client_tools` is deprecated. Use `tools` instead.")
-
-        # Construct agent_config from parameters if not provided
-        if agent_config is None:
-            agent_config = AgentUtils.get_agent_config(
-                model=model,
-                instructions=instructions,
-                tools=tools,
-                tool_config=tool_config,
-                sampling_params=sampling_params,
-                max_infer_iters=max_infer_iters,
-                input_shields=input_shields,
-                output_shields=output_shields,
-                response_format=response_format,
-                enable_session_persistence=enable_session_persistence,
-            )
-            client_tools = AgentUtils.get_client_tools(tools)
-
-        self.agent_config = agent_config
-        self.client_tools = {t.get_name(): t for t in client_tools}
-        self.sessions = []
-        self.tool_parser = tool_parser
-        self.builtin_tools = {}
-        self.extra_headers = extra_headers
-        self.initialize()
-
-    def initialize(self) -> None:
-        agentic_system_create_response = self.client.agents.create(
-            agent_config=self.agent_config,
-            extra_headers=self.extra_headers,
-        )
-        self.agent_id = agentic_system_create_response.agent_id
-        for tg in self.agent_config["toolgroups"]:
-            toolgroup_id = tg if isinstance(tg, str) else tg.get("name")
-            for tool in self.client.tools.list(toolgroup_id=toolgroup_id, extra_headers=self.extra_headers):
-                self.builtin_tools[tool.identifier] = tg.get("args", {}) if isinstance(tg, dict) else {}
-
-    def create_session(self, session_name: str) -> str:
-        agentic_system_create_session_response = self.client.agents.session.create(
-            agent_id=self.agent_id,
-            session_name=session_name,
-            extra_headers=self.extra_headers,
-        )
-        self.session_id = agentic_system_create_session_response.session_id
-        self.sessions.append(self.session_id)
-        return self.session_id
-
-    def _run_tool_calls(self, tool_calls: List[ToolCall]) -> List[ToolResponseParam]:
-        responses = []
-        for tool_call in tool_calls:
-            responses.append(self._run_single_tool(tool_call))
-        return responses
-
-    def _run_single_tool(self, tool_call: ToolCall) -> ToolResponseParam:
-        # custom client tools
-        if tool_call.tool_name in self.client_tools:
-            tool = self.client_tools[tool_call.tool_name]
-            # NOTE: tool.run() expects a list of messages, we only pass in last message here
-            # but we could pass in the entire message history
-            result_message = tool.run(
-                [
-                    CompletionMessage(
-                        role="assistant",
-                        content=tool_call.tool_name,
-                        tool_calls=[tool_call],
-                        stop_reason="end_of_turn",
-                    )
-                ]
-            )
-            return result_message
-
-        # builtin tools executed by tool_runtime
-        if tool_call.tool_name in self.builtin_tools:
-            tool_result = self.client.tool_runtime.invoke_tool(
-                tool_name=tool_call.tool_name,
-                kwargs={
-                    **tool_call.arguments,
-                    **self.builtin_tools[tool_call.tool_name],
-                },
-                extra_headers=self.extra_headers,
-            )
-            return ToolResponseParam(
-                call_id=tool_call.call_id,
-                tool_name=tool_call.tool_name,
-                content=tool_result.content,
-            )
-
-        # cannot find tools
-        return ToolResponseParam(
-            call_id=tool_call.call_id,
-            tool_name=tool_call.tool_name,
-            content=f"Unknown tool `{tool_call.tool_name}` was called.",
-        )
-
-    def create_turn(
-        self,
-        messages: List[Union[UserMessage, ToolResponseMessage]],
-        session_id: Optional[str] = None,
-        toolgroups: Optional[List[Toolgroup]] = None,
-        documents: Optional[List[Document]] = None,
-        stream: bool = True,
-        # TODO: deprecate this
-        extra_headers: Headers | None = None,
-    ) -> Iterator[AgentTurnResponseStreamChunk] | Turn:
-        if stream:
-            return self._create_turn_streaming(
-                messages, session_id, toolgroups, documents, extra_headers=extra_headers or self.extra_headers
-            )
-        else:
-            chunks = [
-                x
-                for x in self._create_turn_streaming(
-                    messages,
-                    session_id,
-                    toolgroups,
-                    documents,
-                    extra_headers=extra_headers or self.extra_headers,
-                )
-            ]
-            if not chunks:
-                raise Exception("Turn did not complete")
-
-            last_chunk = chunks[-1]
-            if hasattr(last_chunk, "error"):
-                if "message" in last_chunk.error:
-                    error_msg = last_chunk.error["message"]
-                else:
-                    error_msg = str(last_chunk.error)
-                raise RuntimeError(f"Turn did not complete. Error: {error_msg}")
-            try:
-                return last_chunk.event.payload.turn
-            except AttributeError:
-                raise RuntimeError(f"Turn did not complete. Output: {last_chunk}") from None
-
-    def _create_turn_streaming(
-        self,
-        messages: List[Union[UserMessage, ToolResponseMessage]],
-        session_id: Optional[str] = None,
-        toolgroups: Optional[List[Toolgroup]] = None,
-        documents: Optional[List[Document]] = None,
-        # TODO: deprecate this
-        extra_headers: Headers | None = None,
-    ) -> Iterator[AgentTurnResponseStreamChunk]:
-        n_iter = 0
-
-        # 1. create an agent turn
-        turn_response = self.client.agents.turn.create(
-            agent_id=self.agent_id,
-            # use specified session_id or last session created
-            session_id=session_id or self.session_id[-1],
-            messages=messages,
-            stream=True,
-            documents=documents,
-            toolgroups=toolgroups,
-            extra_headers=extra_headers or self.extra_headers,
-        )
-
-        # 2. process turn and resume if there's a tool call
-        is_turn_complete = False
-        while not is_turn_complete:
-            is_turn_complete = True
-            for chunk in turn_response:
-                if hasattr(chunk, "error"):
-                    yield chunk
-                    return
-                tool_calls = AgentUtils.get_tool_calls(chunk, self.tool_parser)
-                if not tool_calls:
-                    yield chunk
-                else:
-                    is_turn_complete = False
-                    # End of turn is reached, do not resume even if there's a tool call
-                    # We only check for this if tool_parser is not set, because otherwise
-                    # tool call will be parsed on client side, and server will always return "end_of_turn"
-                    if not self.tool_parser and chunk.event.payload.turn.output_message.stop_reason in {"end_of_turn"}:
-                        yield chunk
-                        break
-
-                    turn_id = AgentUtils.get_turn_id(chunk)
-                    if n_iter == 0:
-                        yield chunk
-
-                    # run the tools
-                    tool_responses = self._run_tool_calls(tool_calls)
-
-                    # pass it to next iteration
-                    turn_response = self.client.agents.turn.resume(
-                        agent_id=self.agent_id,
-                        session_id=session_id or self.session_id[-1],
-                        turn_id=turn_id,
-                        tool_responses=tool_responses,
-                        stream=True,
-                        extra_headers=extra_headers or self.extra_headers,
-                    )
-                    n_iter += 1
-
-            if self.tool_parser and n_iter > self.agent_config.get("max_infer_iters", DEFAULT_MAX_ITER):
-                raise Exception("Max inference iterations reached")
-
-
-class AsyncAgent:
-    def __init__(
-        self,
-        client: LlamaStackClient,
-        # begin deprecated
-        agent_config: Optional[AgentConfig] = None,
-        client_tools: Tuple[ClientTool, ...] = (),
-        # end deprecated
-        tool_parser: Optional[ToolParser] = None,
-        model: Optional[str] = None,
-        instructions: Optional[str] = None,
-        tools: Optional[List[Union[Toolgroup, ClientTool, Callable[..., Any]]]] = None,
-        tool_config: Optional[ToolConfig] = None,
-        sampling_params: Optional[SamplingParams] = None,
-        max_infer_iters: Optional[int] = None,
-        input_shields: Optional[List[str]] = None,
-        output_shields: Optional[List[str]] = None,
-        response_format: Optional[ResponseFormat] = None,
-        enable_session_persistence: Optional[bool] = None,
-        extra_headers: Headers | None = None,
-    ):
-        """Construct an Agent with the given parameters.
-
-        :param client: The LlamaStackClient instance.
-        :param agent_config: The AgentConfig instance.
-            ::deprecated: use other parameters instead
-        :param client_tools: A tuple of ClientTool instances.
-            ::deprecated: use tools instead
-        :param tool_parser: Custom logic that parses tool calls from a message.
-        :param model: The model to use for the agent.
-        :param instructions: The instructions for the agent.
-        :param tools: A list of tools for the agent. Values can be one of the following:
-            - dict representing a toolgroup/tool with arguments: e.g. {"name": "builtin::rag/knowledge_search", "args": {"vector_db_ids": [123]}}
-            - a python function with a docstring. See @client_tool for more details.
-            - str representing a tool within a toolgroup: e.g. "builtin::rag/knowledge_search"
-            - str representing a toolgroup_id: e.g. "builtin::rag", "builtin::code_interpreter", where all tools in the toolgroup will be added to the agent
-            - an instance of ClientTool: A client tool object.
-        :param tool_config: The tool configuration for the agent.
-        :param sampling_params: The sampling parameters for the agent.
-        :param max_infer_iters: The maximum number of inference iterations.
-        :param input_shields: The input shields for the agent.
-        :param output_shields: The output shields for the agent.
-        :param response_format: The response format for the agent.
-        :param enable_session_persistence: Whether to enable session persistence.
-        :param extra_headers: Extra headers to add to all requests sent by the agent.
-        """
-        self.client = client
-
-        if agent_config is not None:
-            logger.warning("`agent_config` is deprecated. Use inlined parameters instead.")
-        if client_tools != ():
-            logger.warning("`client_tools` is deprecated. Use `tools` instead.")
-
-        # Construct agent_config from parameters if not provided
-        if agent_config is None:
-            agent_config = AgentUtils.get_agent_config(
-                model=model,
-                instructions=instructions,
-                tools=tools,
-                tool_config=tool_config,
-                sampling_params=sampling_params,
-                max_infer_iters=max_infer_iters,
-                input_shields=input_shields,
-                output_shields=output_shields,
-                response_format=response_format,
-                enable_session_persistence=enable_session_persistence,
-            )
-            client_tools = AgentUtils.get_client_tools(tools)
-
-        self.agent_config = agent_config
-        self.client_tools = {t.get_name(): t for t in client_tools}
-        self.sessions = []
-        self.tool_parser = tool_parser
-        self.builtin_tools = {}
-        self.extra_headers = extra_headers
-        self._agent_id = None
-
-        if isinstance(client, LlamaStackClient):
-            raise ValueError("AsyncAgent must be initialized with an AsyncLlamaStackClient")
-
-    @property
-    def agent_id(self) -> str:
-        if not self._agent_id:
-            raise RuntimeError("Agent ID not initialized. Call initialize() first.")
-        return self._agent_id
-
-    async def initialize(self) -> None:
-        if self._agent_id:
-            return
-
-        agentic_system_create_response = await self.client.agents.create(
-            agent_config=self.agent_config,
-        )
-        self._agent_id = agentic_system_create_response.agent_id
-        for tg in self.agent_config["toolgroups"]:
-            for tool in await self.client.tools.list(toolgroup_id=tg, extra_headers=self.extra_headers):
-                self.builtin_tools[tool.identifier] = tg.get("args", {}) if isinstance(tg, dict) else {}
-
-    async def create_session(self, session_name: str) -> str:
-        await self.initialize()
-        agentic_system_create_session_response = await self.client.agents.session.create(
-            agent_id=self.agent_id,
-            session_name=session_name,
-            extra_headers=self.extra_headers,
-        )
-        self.session_id = agentic_system_create_session_response.session_id
-        self.sessions.append(self.session_id)
-        return self.session_id
-
-    async def create_turn(
-        self,
-        messages: List[Union[UserMessage, ToolResponseMessage]],
-        session_id: Optional[str] = None,
-        toolgroups: Optional[List[Toolgroup]] = None,
-        documents: Optional[List[Document]] = None,
-        stream: bool = True,
-    ) -> AsyncIterator[AgentTurnResponseStreamChunk] | Turn:
-        if stream:
-            return self._create_turn_streaming(messages, session_id, toolgroups, documents)
-        else:
-            chunks = [x async for x in self._create_turn_streaming(messages, session_id, toolgroups, documents)]
-            if not chunks:
-                raise Exception("Turn did not complete")
-            return chunks[-1].event.payload.turn
-
-    async def _run_tool_calls(self, tool_calls: List[ToolCall]) -> List[ToolResponseParam]:
-        responses = []
-        for tool_call in tool_calls:
-            responses.append(await self._run_single_tool(tool_call))
-        return responses
-
-    async def _run_single_tool(self, tool_call: ToolCall) -> ToolResponseParam:
-        # custom client tools
-        if tool_call.tool_name in self.client_tools:
-            tool = self.client_tools[tool_call.tool_name]
-            result_message = await tool.async_run(
-                [
-                    CompletionMessage(
-                        role="assistant",
-                        content=tool_call.tool_name,
-                        tool_calls=[tool_call],
-                        stop_reason="end_of_turn",
-                    )
-                ]
-            )
-            return result_message
-
-        # builtin tools executed by tool_runtime
-        if tool_call.tool_name in self.builtin_tools:
-            tool_result = await self.client.tool_runtime.invoke_tool(
-                tool_name=tool_call.tool_name,
-                kwargs={
-                    **tool_call.arguments,
-                    **self.builtin_tools[tool_call.tool_name],
-                },
-                extra_headers=self.extra_headers,
-            )
-            return ToolResponseParam(
-                call_id=tool_call.call_id,
-                tool_name=tool_call.tool_name,
-                content=tool_result.content,
-            )
-
-        # cannot find tools
-        return ToolResponseParam(
-            call_id=tool_call.call_id,
-            tool_name=tool_call.tool_name,
-            content=f"Unknown tool `{tool_call.tool_name}` was called.",
-        )
-
-    async def _create_turn_streaming(
-        self,
-        messages: List[Union[UserMessage, ToolResponseMessage]],
-        session_id: Optional[str] = None,
-        toolgroups: Optional[List[Toolgroup]] = None,
-        documents: Optional[List[Document]] = None,
-    ) -> AsyncIterator[AgentTurnResponseStreamChunk]:
-        n_iter = 0
-
-        # 1. create an agent turn
-        turn_response = await self.client.agents.turn.create(
-            agent_id=self.agent_id,
-            # use specified session_id or last session created
-            session_id=session_id or self.session_id[-1],
-            messages=messages,
-            stream=True,
-            documents=documents,
-            toolgroups=toolgroups,
-            extra_headers=self.extra_headers,
-        )
-
-        # 2. process turn and resume if there's a tool call
-        is_turn_complete = False
-        while not is_turn_complete:
-            is_turn_complete = True
-            async for chunk in turn_response:
-                if hasattr(chunk, "error"):
-                    yield chunk
-                    return
-
-                tool_calls = AgentUtils.get_tool_calls(chunk, self.tool_parser)
-                if not tool_calls:
-                    yield chunk
-                else:
-                    is_turn_complete = False
-                    # End of turn is reached, do not resume even if there's a tool call
-                    if not self.tool_parser and chunk.event.payload.turn.output_message.stop_reason in {"end_of_turn"}:
-                        yield chunk
-                        break
-
-                    turn_id = AgentUtils.get_turn_id(chunk)
-                    if n_iter == 0:
-                        yield chunk
-
-                    # run the tools
-                    tool_responses = await self._run_tool_calls(tool_calls)
-
-                    # pass it to next iteration
-                    turn_response = await self.client.agents.turn.resume(
-                        agent_id=self.agent_id,
-                        session_id=session_id or self.session_id[-1],
-                        turn_id=turn_id,
-                        tool_responses=tool_responses,
-                        stream=True,
-                        extra_headers=self.extra_headers,
-                    )
-                    n_iter += 1
-
-            if self.tool_parser and n_iter > self.agent_config.get("max_infer_iters", DEFAULT_MAX_ITER):
-                raise Exception("Max inference iterations reached")
diff --git a/src/llama_stack_client/lib/agents/client_tool.py b/src/llama_stack_client/lib/agents/client_tool.py
deleted file mode 100644
index c199b211..00000000
--- a/src/llama_stack_client/lib/agents/client_tool.py
+++ /dev/null
@@ -1,235 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import inspect
-import json
-from abc import abstractmethod
-from typing import (
-    Any,
-    Callable,
-    Dict,
-    get_args,
-    get_origin,
-    get_type_hints,
-    List,
-    TypeVar,
-    Union,
-)
-
-from llama_stack_client.types import CompletionMessage, Message, ToolResponse
-from llama_stack_client.types.tool_def_param import Parameter, ToolDefParam
-
-
-class ClientTool:
-    """
-    Developers can define their custom tools that models can use
-    by extending this class.
-
-    Developers need to provide
-        - name
-        - description
-        - params_definition
-        - implement tool's behavior in `run_impl` method
-
-    NOTE: The return of the `run` method needs to be json serializable
-    """
-
-    @abstractmethod
-    def get_name(self) -> str:
-        raise NotImplementedError
-
-    @abstractmethod
-    def get_description(self) -> str:
-        raise NotImplementedError
-
-    @abstractmethod
-    def get_params_definition(self) -> Dict[str, Parameter]:
-        raise NotImplementedError
-
-    def get_instruction_string(self) -> str:
-        return f"Use the function '{self.get_name()}' to: {self.get_description()}"
-
-    def parameters_for_system_prompt(self) -> str:
-        return json.dumps(
-            {
-                "name": self.get_name(),
-                "description": self.get_description(),
-                "parameters": {name: definition for name, definition in self.get_params_definition().items()},
-            }
-        )
-
-    def get_tool_definition(self) -> ToolDefParam:
-        return ToolDefParam(
-            name=self.get_name(),
-            description=self.get_description(),
-            parameters=list(self.get_params_definition().values()),
-            metadata={},
-            tool_prompt_format="python_list",
-        )
-
-    def run(
-        self,
-        message_history: List[Message],
-    ) -> ToolResponse:
-        # NOTE: we could override this method to use the entire message history for advanced tools
-        last_message = message_history[-1]
-        assert isinstance(last_message, CompletionMessage), "Expected CompletionMessage"
-        assert len(last_message.tool_calls) == 1, "Expected single tool call"
-        tool_call = last_message.tool_calls[0]
-
-        metadata = {}
-        try:
-            if tool_call.arguments_json is not None:
-                params = json.loads(tool_call.arguments_json)
-            elif isinstance(tool_call.arguments, str):
-                params = json.loads(tool_call.arguments)
-            else:
-                params = tool_call.arguments
-
-            response = self.run_impl(**params)
-            if isinstance(response, dict) and "content" in response:
-                content = json.dumps(response["content"], ensure_ascii=False)
-                metadata = response.get("metadata", {})
-            else:
-                content = json.dumps(response, ensure_ascii=False)
-        except Exception as e:
-            content = f"Error when running tool: {e}"
-        return ToolResponse(
-            call_id=tool_call.call_id,
-            tool_name=tool_call.tool_name,
-            content=content,
-            metadata=metadata,
-        )
-
-    async def async_run(
-        self,
-        message_history: List[Message],
-    ) -> ToolResponse:
-        last_message = message_history[-1]
-
-        assert len(last_message.tool_calls) == 1, "Expected single tool call"
-        tool_call = last_message.tool_calls[0]
-        metadata = {}
-        try:
-            response = await self.async_run_impl(**tool_call.arguments)
-            if isinstance(response, dict) and "content" in response:
-                content = json.dumps(response["content"], ensure_ascii=False)
-                metadata = response.get("metadata", {})
-            else:
-                content = json.dumps(response, ensure_ascii=False)
-        except Exception as e:
-            content = f"Error when running tool: {e}"
-
-        return ToolResponse(
-            call_id=tool_call.call_id,
-            tool_name=tool_call.tool_name,
-            content=content,
-            metadata=metadata,
-        )
-
-    @abstractmethod
-    def run_impl(self, **kwargs) -> Any:
-        """
-        Can return any json serializable object.
-        To return metadata along with the response, return a dict with a "content" key, and a "metadata" key, where the "content" is the response that'll
-        be serialized and passed to the model, and the "metadata" will be logged as metadata in the tool execution step within the Agent execution trace.
-        """
-        raise NotImplementedError
-
-    @abstractmethod
-    def async_run_impl(self, **kwargs):
-        raise NotImplementedError
-
-
-T = TypeVar("T", bound=Callable)
-
-
-def client_tool(func: T) -> ClientTool:
-    """
-    Decorator to convert a function into a ClientTool.
-    Usage:
-        @client_tool
-        def add(x: int, y: int) -> int:
-            '''Add 2 integer numbers
-
-            :param x: integer 1
-            :param y: integer 2
-            :returns: sum of x + y
-            '''
-            return x + y
-
-    Note that you must use RST-style docstrings with :param tags for each parameter. These will be used for prompting model to use tools correctly.
-    :returns: tags in the docstring is optional as it would not be used for the tool's description.
-
-    Your function can return any json serializable object.
-    To return metadata along with the response, return a dict with a "content" key, and a "metadata" key, where the "content" is the response that'll
-    be serialized and passed to the model, and the "metadata" will be logged as metadata in the tool execution step within the Agent execution trace.
-    """
-
-    class _WrappedTool(ClientTool):
-        __name__ = func.__name__
-        __doc__ = func.__doc__
-        __module__ = func.__module__
-
-        def get_name(self) -> str:
-            return func.__name__
-
-        def get_description(self) -> str:
-            doc = inspect.getdoc(func)
-            if doc:
-                # Get everything before the first :param
-                return doc.split(":param")[0].strip()
-            else:
-                raise ValueError(
-                    f"No description found for client tool {__name__}. Please provide a RST-style docstring with description and :param tags for each parameter."
-                )
-
-        def get_params_definition(self) -> Dict[str, Parameter]:
-            hints = get_type_hints(func)
-            # Remove return annotation if present
-            hints.pop("return", None)
-
-            # Get parameter descriptions from docstring
-            params = {}
-            sig = inspect.signature(func)
-            doc = inspect.getdoc(func) or ""
-
-            for name, type_hint in hints.items():
-                # Look for :param name: in docstring
-                param_doc = ""
-                for line in doc.split("\n"):
-                    if line.strip().startswith(f":param {name}:"):
-                        param_doc = line.split(":", 2)[2].strip()
-                        break
-
-                if param_doc == "":
-                    raise ValueError(f"No parameter description found for parameter {name}")
-
-                param = sig.parameters[name]
-                is_optional_type = get_origin(type_hint) is Union and type(None) in get_args(type_hint)
-                is_required = param.default == inspect.Parameter.empty and not is_optional_type
-                params[name] = Parameter(
-                    name=name,
-                    description=param_doc or f"Parameter {name}",
-                    parameter_type=type_hint.__name__,
-                    default=(param.default if param.default != inspect.Parameter.empty else None),
-                    required=is_required,
-                )
-
-            return params
-
-        def run_impl(self, **kwargs) -> Any:
-            if inspect.iscoroutinefunction(func):
-                raise NotImplementedError("Tool is async but run_impl is not async")
-            return func(**kwargs)
-
-        async def async_run_impl(self, **kwargs):
-            if inspect.iscoroutinefunction(func):
-                return await func(**kwargs)
-            else:
-                return func(**kwargs)
-
-    return _WrappedTool()
diff --git a/src/llama_stack_client/lib/agents/event_logger.py b/src/llama_stack_client/lib/agents/event_logger.py
deleted file mode 100644
index 731c7b2f..00000000
--- a/src/llama_stack_client/lib/agents/event_logger.py
+++ /dev/null
@@ -1,165 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from typing import Any, Iterator, Optional, Tuple
-
-from termcolor import cprint
-
-from llama_stack_client.types import InterleavedContent
-
-
-def interleaved_content_as_str(content: InterleavedContent, sep: str = " ") -> str:
-    def _process(c: Any) -> str:
-        if isinstance(c, str):
-            return c
-        elif hasattr(c, "type"):
-            if c.type == "text":
-                return c.text
-            elif c.type == "image":
-                return "<image>"
-            else:
-                raise ValueError(f"Unexpected type {c}")
-        else:
-            raise ValueError(f"Unsupported content type: {type(c)}")
-
-    if isinstance(content, list):
-        return sep.join(_process(c) for c in content)
-    else:
-        return _process(content)
-
-
-class TurnStreamPrintableEvent:
-    def __init__(
-        self,
-        role: Optional[str] = None,
-        content: str = "",
-        end: Optional[str] = "\n",
-        color: str = "white",
-    ) -> None:
-        self.role = role
-        self.content = content
-        self.color = color
-        self.end = "\n" if end is None else end
-
-    def __str__(self) -> str:
-        if self.role is not None:
-            return f"{self.role}> {self.content}"
-        else:
-            return f"{self.content}"
-
-    def print(self, flush: bool = True) -> None:
-        cprint(f"{str(self)}", color=self.color, end=self.end, flush=flush)
-
-
-class TurnStreamEventPrinter:
-    def __init__(self) -> None:
-        self.previous_event_type: Optional[str] = None
-        self.previous_step_type: Optional[str] = None
-
-    def yield_printable_events(self, chunk: Any) -> Iterator[TurnStreamPrintableEvent]:
-        for printable_event in self._yield_printable_events(chunk, self.previous_event_type, self.previous_step_type):
-            yield printable_event
-
-        if not hasattr(chunk, "error"):
-            self.previous_event_type, self.previous_step_type = self._get_event_type_step_type(chunk)
-
-    def _yield_printable_events(
-        self, chunk: Any, previous_event_type: Optional[str] = None, previous_step_type: Optional[str] = None
-    ) -> Iterator[TurnStreamPrintableEvent]:
-        if hasattr(chunk, "error"):
-            yield TurnStreamPrintableEvent(role=None, content=chunk.error["message"], color="red")
-            return
-
-        event = chunk.event
-        event_type = event.payload.event_type
-
-        if event_type in {"turn_start", "turn_complete", "turn_awaiting_input"}:
-            # Currently not logging any turn realted info
-            yield TurnStreamPrintableEvent(role=None, content="", end="", color="grey")
-            return
-
-        step_type = event.payload.step_type
-        # handle safety
-        if step_type == "shield_call" and event_type == "step_complete":
-            violation = event.payload.step_details.violation
-            if not violation:
-                yield TurnStreamPrintableEvent(role=step_type, content="No Violation", color="magenta")
-            else:
-                yield TurnStreamPrintableEvent(
-                    role=step_type,
-                    content=f"{violation.metadata} {violation.user_message}",
-                    color="red",
-                )
-
-        # handle inference
-        if step_type == "inference":
-            if event_type == "step_start":
-                yield TurnStreamPrintableEvent(role=step_type, content="", end="", color="yellow")
-            elif event_type == "step_progress":
-                if event.payload.delta.type == "tool_call":
-                    if isinstance(event.payload.delta.tool_call, str):
-                        yield TurnStreamPrintableEvent(
-                            role=None,
-                            content=event.payload.delta.tool_call,
-                            end="",
-                            color="cyan",
-                        )
-                elif event.payload.delta.type == "text":
-                    yield TurnStreamPrintableEvent(
-                        role=None,
-                        content=event.payload.delta.text,
-                        end="",
-                        color="yellow",
-                    )
-            else:
-                # step complete
-                yield TurnStreamPrintableEvent(role=None, content="")
-
-        # handle tool_execution
-        if step_type == "tool_execution" and event_type == "step_complete":
-            # Only print tool calls and responses at the step_complete event
-            details = event.payload.step_details
-            for t in details.tool_calls:
-                yield TurnStreamPrintableEvent(
-                    role=step_type,
-                    content=f"Tool:{t.tool_name} Args:{t.arguments}",
-                    color="green",
-                )
-
-            for r in details.tool_responses:
-                if r.tool_name == "query_from_memory":
-                    inserted_context = interleaved_content_as_str(r.content)
-                    content = f"fetched {len(inserted_context)} bytes from memory"
-
-                    yield TurnStreamPrintableEvent(
-                        role=step_type,
-                        content=content,
-                        color="cyan",
-                    )
-                else:
-                    yield TurnStreamPrintableEvent(
-                        role=step_type,
-                        content=f"Tool:{r.tool_name} Response:{r.content}",
-                        color="green",
-                    )
-
-    def _get_event_type_step_type(self, chunk: Any) -> Tuple[Optional[str], Optional[str]]:
-        if hasattr(chunk, "event"):
-            previous_event_type = chunk.event.payload.event_type if hasattr(chunk, "event") else None
-            previous_step_type = (
-                chunk.event.payload.step_type
-                if previous_event_type not in {"turn_start", "turn_complete", "turn_awaiting_input"}
-                else None
-            )
-            return previous_event_type, previous_step_type
-        return None, None
-
-
-class EventLogger:
-    def log(self, event_generator: Iterator[Any]) -> Iterator[TurnStreamPrintableEvent]:
-        printer = TurnStreamEventPrinter()
-        for chunk in event_generator:
-            yield from printer.yield_printable_events(chunk)
diff --git a/src/llama_stack_client/lib/agents/react/__init__.py b/src/llama_stack_client/lib/agents/react/__init__.py
deleted file mode 100644
index 756f351d..00000000
--- a/src/llama_stack_client/lib/agents/react/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
diff --git a/src/llama_stack_client/lib/agents/react/agent.py b/src/llama_stack_client/lib/agents/react/agent.py
deleted file mode 100644
index 2719a7dd..00000000
--- a/src/llama_stack_client/lib/agents/react/agent.py
+++ /dev/null
@@ -1,223 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-import logging
-from typing import Any, Callable, List, Optional, Tuple, Union
-
-from llama_stack_client import LlamaStackClient
-from llama_stack_client.types.agent_create_params import AgentConfig
-from llama_stack_client.types.agents.turn_create_params import Toolgroup
-from llama_stack_client.types.shared_params.agent_config import ToolConfig
-from llama_stack_client.types.shared_params.response_format import ResponseFormat
-from llama_stack_client.types.shared_params.sampling_params import SamplingParams
-
-from ..agent import Agent, AgentUtils
-from ..client_tool import ClientTool
-from ..tool_parser import ToolParser
-from .prompts import DEFAULT_REACT_AGENT_SYSTEM_PROMPT_TEMPLATE
-from .tool_parser import ReActOutput, ReActToolParser
-
-logger = logging.getLogger(__name__)
-
-
-def get_tool_defs(
-    client: LlamaStackClient, builtin_toolgroups: Tuple[Toolgroup] = (), client_tools: Tuple[ClientTool] = ()
-):
-    tool_defs = []
-    for x in builtin_toolgroups:
-        if isinstance(x, str):
-            toolgroup_id = x
-        else:
-            toolgroup_id = x["name"]
-        tool_defs.extend(
-            [
-                {
-                    "name": tool.identifier,
-                    "description": tool.description,
-                    "parameters": tool.parameters,
-                }
-                for tool in client.tools.list(toolgroup_id=toolgroup_id)
-            ]
-        )
-
-    tool_defs.extend(
-        [
-            {
-                "name": tool.get_name(),
-                "description": tool.get_description(),
-                "parameters": tool.get_params_definition(),
-            }
-            for tool in client_tools
-        ]
-    )
-    return tool_defs
-
-
-def get_default_react_instructions(
-    client: LlamaStackClient, builtin_toolgroups: Tuple[str] = (), client_tools: Tuple[ClientTool] = ()
-):
-    tool_defs = get_tool_defs(client, builtin_toolgroups, client_tools)
-    tool_names = ", ".join([x["name"] for x in tool_defs])
-    tool_descriptions = "\n".join([f"- {x['name']}: {x}" for x in tool_defs])
-    instruction = DEFAULT_REACT_AGENT_SYSTEM_PROMPT_TEMPLATE.replace("<<tool_names>>", tool_names).replace(
-        "<<tool_descriptions>>", tool_descriptions
-    )
-    return instruction
-
-
-def get_agent_config_DEPRECATED(
-    client: LlamaStackClient,
-    model: str,
-    builtin_toolgroups: Tuple[str] = (),
-    client_tools: Tuple[ClientTool] = (),
-    json_response_format: bool = False,
-    custom_agent_config: Optional[AgentConfig] = None,
-) -> AgentConfig:
-    if custom_agent_config is None:
-        instruction = get_default_react_instructions(client, builtin_toolgroups, client_tools)
-
-        # user default toolgroups
-        agent_config = AgentConfig(
-            model=model,
-            instructions=instruction,
-            toolgroups=builtin_toolgroups,
-            client_tools=[client_tool.get_tool_definition() for client_tool in client_tools],
-            tool_config={
-                "tool_choice": "auto",
-                "system_message_behavior": "replace",
-            },
-            input_shields=[],
-            output_shields=[],
-            enable_session_persistence=False,
-        )
-    else:
-        agent_config = custom_agent_config
-
-    if json_response_format:
-        agent_config["response_format"] = {
-            "type": "json_schema",
-            "json_schema": ReActOutput.model_json_schema(),
-        }
-
-    return agent_config
-
-
-class ReActAgent(Agent):
-    """ReAct agent.
-
-    Simple wrapper around Agent to add prepare prompts for creating a ReAct agent from a list of tools.
-    """
-
-    def __init__(
-        self,
-        client: LlamaStackClient,
-        model: str,
-        tool_parser: ToolParser = ReActToolParser(),
-        instructions: Optional[str] = None,
-        tools: Optional[List[Union[Toolgroup, ClientTool, Callable[..., Any]]]] = None,
-        tool_config: Optional[ToolConfig] = None,
-        sampling_params: Optional[SamplingParams] = None,
-        max_infer_iters: Optional[int] = None,
-        input_shields: Optional[List[str]] = None,
-        output_shields: Optional[List[str]] = None,
-        response_format: Optional[ResponseFormat] = None,
-        enable_session_persistence: Optional[bool] = None,
-        json_response_format: bool = False,
-        builtin_toolgroups: Tuple[str] = (),  # DEPRECATED
-        client_tools: Tuple[ClientTool] = (),  # DEPRECATED
-        custom_agent_config: Optional[AgentConfig] = None,  # DEPRECATED
-    ):
-        """Construct an Agent with the given parameters.
-
-        :param client: The LlamaStackClient instance.
-        :param custom_agent_config: The AgentConfig instance.
-            ::deprecated: use other parameters instead
-        :param client_tools: A tuple of ClientTool instances.
-            ::deprecated: use tools instead
-        :param builtin_toolgroups: A tuple of Toolgroup instances.
-            ::deprecated: use tools instead
-        :param tool_parser: Custom logic that parses tool calls from a message.
-        :param model: The model to use for the agent.
-        :param instructions: The instructions for the agent.
-        :param tools: A list of tools for the agent. Values can be one of the following:
-            - dict representing a toolgroup/tool with arguments: e.g. {"name": "builtin::rag/knowledge_search", "args": {"vector_db_ids": [123]}}
-            - a python function with a docstring. See @client_tool for more details.
-            - str representing a tool within a toolgroup: e.g. "builtin::rag/knowledge_search"
-            - str representing a toolgroup_id: e.g. "builtin::rag", "builtin::code_interpreter", where all tools in the toolgroup will be added to the agent
-            - an instance of ClientTool: A client tool object.
-        :param tool_config: The tool configuration for the agent.
-        :param sampling_params: The sampling parameters for the agent.
-        :param max_infer_iters: The maximum number of inference iterations.
-        :param input_shields: The input shields for the agent.
-        :param output_shields: The output shields for the agent.
-        :param response_format: The response format for the agent.
-        :param enable_session_persistence: Whether to enable session persistence.
-        :param json_response_format: Whether to use the json response format with default ReAct output schema.
-            ::deprecated: use response_format instead
-        """
-        use_deprecated_params = False
-        if custom_agent_config is not None:
-            logger.warning("`custom_agent_config` is deprecated. Use inlined parameters instead.")
-            use_deprecated_params = True
-        if client_tools != ():
-            logger.warning("`client_tools` is deprecated. Use `tools` instead.")
-            use_deprecated_params = True
-        if builtin_toolgroups != ():
-            logger.warning("`builtin_toolgroups` is deprecated. Use `tools` instead.")
-            use_deprecated_params = True
-
-        if use_deprecated_params:
-            agent_config = get_agent_config_DEPRECATED(
-                client=client,
-                model=model,
-                builtin_toolgroups=builtin_toolgroups,
-                client_tools=client_tools,
-                json_response_format=json_response_format,
-            )
-            super().__init__(
-                client=client,
-                agent_config=agent_config,
-                client_tools=client_tools,
-                tool_parser=tool_parser,
-            )
-
-        else:
-            if not tool_config:
-                tool_config = {
-                    "tool_choice": "auto",
-                    "system_message_behavior": "replace",
-                }
-
-            if json_response_format:
-                if instructions is not None:
-                    logger.warning(
-                        "Using a custom instructions, but json_response_format is set. Please make sure instructions are"
-                        "compatible with the default ReAct output format."
-                    )
-                response_format = {
-                    "type": "json_schema",
-                    "json_schema": ReActOutput.model_json_schema(),
-                }
-
-            # build REACT instructions
-            client_tools = AgentUtils.get_client_tools(tools)
-            builtin_toolgroups = [x for x in tools if isinstance(x, str) or isinstance(x, dict)]
-            if not instructions:
-                instructions = get_default_react_instructions(client, builtin_toolgroups, client_tools)
-
-            super().__init__(
-                client=client,
-                model=model,
-                tool_parser=tool_parser,
-                instructions=instructions,
-                tools=tools,
-                tool_config=tool_config,
-                sampling_params=sampling_params,
-                max_infer_iters=max_infer_iters,
-                input_shields=input_shields,
-                output_shields=output_shields,
-                response_format=response_format,
-                enable_session_persistence=enable_session_persistence,
-            )
diff --git a/src/llama_stack_client/lib/agents/react/prompts.py b/src/llama_stack_client/lib/agents/react/prompts.py
deleted file mode 100644
index cad7054a..00000000
--- a/src/llama_stack_client/lib/agents/react/prompts.py
+++ /dev/null
@@ -1,151 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-DEFAULT_REACT_AGENT_SYSTEM_PROMPT_TEMPLATE = """
-You are an expert assistant who can solve any task using tool calls. You will be given a task to solve as best you can.
-To do so, you have been given access to the following tools: <<tool_names>>
-
-You must always respond in the following JSON format:
-{
-    "thought": $THOUGHT_PROCESS,
-    "action": {
-        "tool_name": $TOOL_NAME,
-        "tool_params": $TOOL_PARAMS
-    },
-    "answer": $ANSWER
-}
-
-Specifically, this json should have a `thought` key, a `action` key and an `answer` key.
-
-The `action` key should specify the $TOOL_NAME the name of the tool to use and the `tool_params` key should specify the parameters key as input to the tool.
-
-Make sure to have the $TOOL_PARAMS as a list of dictionaries in the right format for the tool you are using, and do not put variable names as input if you can find the right values.
-
-You should always think about one action to take, and have the `thought` key contain your thought process about this action.
-If the tool responds, the tool will return an observation containing result of the action. 
-... (this Thought/Action/Observation can repeat N times, you should take several steps when needed. The action key must only use a SINGLE tool at a time.)
-
-You can use the result of the previous action as input for the next action.
-The observation will always be the response from calling the tool: it can represent a file, like "image_1.jpg". You do not need to generate them, it will be provided to you. 
-Then you can use it as input for the next action. You can do it for instance as follows:
-
-Observation: "image_1.jpg"
-{
-    "thought": "I need to transform the image that I received in the previous observation to make it green.",
-    "action": {
-        "tool_name": "image_transformer",
-        "tool_params": [{"name": "image"}, {"value": "image_1.jpg"}]
-    },
-    "answer": null
-}
-
-
-To provide the final answer to the task, use the `answer` key. It is the only way to complete the task, else you will be stuck on a loop. So your final output should look like this:
-Observation: "your observation"
-
-{
-    "thought": "you thought process",
-    "action": null,
-    "answer": "insert your final answer here"
-}
-
-Here are a few examples using notional tools:
----
-Task: "Generate an image of the oldest person in this document."
-
-Your Response:
-{
-    "thought": "I will proceed step by step and use the following tools: `document_qa` to find the oldest person in the document, then `image_generator` to generate an image according to the answer.",
-    "action": {
-        "tool_name": "document_qa",
-        "tool_params": [{"name": "document"}, {"value": "document.pdf"}, {"name": "question"}, {"value": "Who is the oldest person mentioned?"}]
-    },
-    "answer": null
-}
-
-Your Observation: "The oldest person in the document is John Doe, a 55 year old lumberjack living in Newfoundland."
-
-Your Response:
-{
-    "thought": "I will now generate an image showcasing the oldest person.",
-    "action": {
-        "tool_name": "image_generator",
-        "tool_params": [{"name": "prompt"}, {"value": "A portrait of John Doe, a 55-year-old man living in Canada."}]
-    },
-    "answer": null
-}
-Your Observation: "image.png"
-
-{
-    "thought": "I will now return the generated image.",
-    "action": null,
-    "answer": "image.png"
-}
-
----
-Task: "What is the result of the following operation: 5 + 3 + 1294.678?"
-
-Your Response:
-{
-    "thought": "I will use python code evaluator to compute the result of the operation and then return the final answer using the `final_answer` tool",
-    "action": {
-        "tool_name": "python_interpreter",
-        "tool_params": [{"name": "code"}, {"value": "5 + 3 + 1294.678"}]
-    },
-    "answer": null
-}
-Your Observation: 1302.678
-
-{
-    "thought": "Now that I know the result, I will now return it.",
-    "action": null,
-    "answer": 1302.678
-}
-
----
-Task: "Which city has the highest population , Guangzhou or Shanghai?"
-
-Your Response:
-{
-    "thought": "I need to get the populations for both cities and compare them: I will use the tool `search` to get the population of both cities.",
-    "action": {
-        "tool_name": "search",
-        "tool_params": [{"name": "query"}, {"value": "Population Guangzhou"}]
-    },
-    "answer": null
-}
-Your Observation: ['Guangzhou has a population of 15 million inhabitants as of 2021.']
-
-Your Response:
-{
-    "thought": "Now let's get the population of Shanghai using the tool 'search'.",
-    "action": {
-        "tool_name": "search",
-        "tool_params": [{"name": "query"}, {"value": "Population Shanghai"}]
-    },
-    "answer": null
-}
-Your Observation: "26 million (2019)"
-
-Your Response:
-{
-    "thought": "Now I know that Shanghai has a larger population. Let's return the result.",
-    "action": null,
-    "answer": "Shanghai"
-}
-
-Above example were using notional tools that might not exist for you. You only have access to these tools:
-<<tool_descriptions>>
-
-Here are the rules you should always follow to solve your task:
-1. ALWAYS answer in the JSON format with keys "thought", "action", "answer", else you will fail. 
-2. Always use the right arguments for the tools. Never use variable names in the 'tool_params' field, use the value instead.
-3. Call a tool only when needed: do not call the search agent if you do not need information, try to solve the task yourself.
-4. Never re-do a tool call that you previously did with the exact same parameters.
-5. Observations will be provided to you, no need to generate them
-
-Now Begin! If you solve the task correctly, you will receive a reward of $1,000,000.
-"""
diff --git a/src/llama_stack_client/lib/agents/react/tool_parser.py b/src/llama_stack_client/lib/agents/react/tool_parser.py
deleted file mode 100644
index 76b787dd..00000000
--- a/src/llama_stack_client/lib/agents/react/tool_parser.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import json
-import uuid
-from typing import List, Optional, Union
-
-from llama_stack_client.types.shared.completion_message import CompletionMessage
-from llama_stack_client.types.shared.tool_call import ToolCall
-
-from pydantic import BaseModel, ValidationError
-
-from ..tool_parser import ToolParser
-
-
-class Param(BaseModel):
-    name: str
-    value: Union[str, int, float, bool]
-
-
-class Action(BaseModel):
-    tool_name: str
-    tool_params: List[Param]
-
-
-class ReActOutput(BaseModel):
-    thought: str
-    action: Optional[Action]
-    answer: Optional[str]
-
-
-class ReActToolParser(ToolParser):
-    def get_tool_calls(self, output_message: CompletionMessage) -> List[ToolCall]:
-        tool_calls = []
-        response_text = str(output_message.content)
-        try:
-            react_output = ReActOutput.model_validate_json(response_text)
-        except ValidationError as e:
-            print(f"Error parsing action: {e}")
-            return tool_calls
-
-        if react_output.answer:
-            return tool_calls
-
-        if react_output.action:
-            tool_name = react_output.action.tool_name
-            tool_params = react_output.action.tool_params
-            params = {param.name: param.value for param in tool_params}
-            if tool_name and tool_params:
-                call_id = str(uuid.uuid4())
-                tool_calls = [
-                    ToolCall(
-                        call_id=call_id,
-                        tool_name=tool_name,
-                        arguments=params,
-                        arguments_json=json.dumps(params),
-                    )
-                ]
-
-        return tool_calls
diff --git a/src/llama_stack_client/lib/agents/tool_parser.py b/src/llama_stack_client/lib/agents/tool_parser.py
deleted file mode 100644
index dc0c5ba4..00000000
--- a/src/llama_stack_client/lib/agents/tool_parser.py
+++ /dev/null
@@ -1,44 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from abc import abstractmethod
-from typing import List
-
-from llama_stack_client.types.agents.turn import CompletionMessage
-from llama_stack_client.types.shared.tool_call import ToolCall
-
-
-class ToolParser:
-    """
-    Abstract base class for parsing agent responses into tool calls. Implement this class to customize how
-    agent outputs are processed and transformed into executable tool calls.
-
-    To use this class:
-    1. Create a subclass of ToolParser
-    2. Implement the `get_tool_calls` method
-    3. Pass your parser instance to the Agent's constructor
-
-    Example:
-        class MyCustomParser(ToolParser):
-            def get_tool_calls(self, output_message: CompletionMessage) -> List[ToolCall]:
-                # Add your custom parsing logic here
-                return extracted_tool_calls
-
-    Methods:
-        get_tool_calls(output_message: CompletionMessage) -> List[ToolCall]:
-            Abstract method that must be implemented by subclasses to process
-            the agent's response and extract tool calls.
-
-            Args:
-                output_message (CompletionMessage): The response message from agent turn
-
-            Returns:
-                Optional[List[ToolCall]]: A list of parsed tool calls, or None if no tools should be called
-    """
-
-    @abstractmethod
-    def get_tool_calls(self, output_message: CompletionMessage) -> List[ToolCall]:
-        raise NotImplementedError
diff --git a/src/llama_stack_client/lib/cli/__init__.py b/src/llama_stack_client/lib/cli/__init__.py
deleted file mode 100644
index 77737e7d..00000000
--- a/src/llama_stack_client/lib/cli/__init__.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-# Ignore tqdm experimental warning
-import warnings
-
-from tqdm import TqdmExperimentalWarning
-
-warnings.filterwarnings("ignore", category=TqdmExperimentalWarning)
diff --git a/src/llama_stack_client/lib/cli/common/__init__.py b/src/llama_stack_client/lib/cli/common/__init__.py
deleted file mode 100644
index 756f351d..00000000
--- a/src/llama_stack_client/lib/cli/common/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
diff --git a/src/llama_stack_client/lib/cli/common/utils.py b/src/llama_stack_client/lib/cli/common/utils.py
deleted file mode 100644
index faf9ac26..00000000
--- a/src/llama_stack_client/lib/cli/common/utils.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-from functools import wraps
-
-from rich.console import Console
-from rich.panel import Panel
-from rich.table import Table
-
-
-def create_bar_chart(data, labels, title=""):
-    """Create a bar chart using Rich Table."""
-
-    console = Console()
-    table = Table(title=title)
-    table.add_column("Score")
-    table.add_column("Count")
-
-    max_value = max(data)
-    total_count = sum(data)
-
-    # Define a list of colors to cycle through
-    colors = ["green", "blue", "red", "yellow", "magenta", "cyan"]
-
-    for i, (label, value) in enumerate(zip(labels, data)):
-        bar_length = int((value / max_value) * 20)  # Adjust bar length as needed
-        bar = "█" * bar_length + " " * (20 - bar_length)
-        color = colors[i % len(colors)]
-        table.add_row(label, f"[{color}]{bar}[/] {value}/{total_count}")
-
-    console.print(table)
-
-
-def handle_client_errors(operation_name):
-    def decorator(func):
-        @wraps(func)
-        def wrapper(*args, **kwargs):
-            try:
-                return func(*args, **kwargs)
-            except Exception as e:
-                console = Console()
-                console.print(
-                    Panel.fit(
-                        f"[bold red]Failed to {operation_name}[/bold red]\n\n"
-                        f"[yellow]Error Type:[/yellow] {e.__class__.__name__}\n"
-                        f"[yellow]Details:[/yellow] {str(e)}"
-                    )
-                )
-
-        return wrapper
-
-    return decorator
diff --git a/src/llama_stack_client/lib/cli/configure.py b/src/llama_stack_client/lib/cli/configure.py
deleted file mode 100644
index 59554580..00000000
--- a/src/llama_stack_client/lib/cli/configure.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import os
-
-import click
-import yaml
-from prompt_toolkit import prompt
-from prompt_toolkit.validation import Validator
-from urllib.parse import urlparse
-
-from llama_stack_client.lib.cli.constants import LLAMA_STACK_CLIENT_CONFIG_DIR, get_config_file_path
-
-
-def get_config():
-    config_file = get_config_file_path()
-    if config_file.exists():
-        with open(config_file, "r") as f:
-            return yaml.safe_load(f)
-    return None
-
-
-@click.command()
-@click.help_option("-h", "--help")
-@click.option("--endpoint", type=str, help="Llama Stack distribution endpoint", default="")
-@click.option("--api-key", type=str, help="Llama Stack distribution API key", default="")
-def configure(endpoint: str | None, api_key: str | None):
-    """Configure Llama Stack Client CLI."""
-    os.makedirs(LLAMA_STACK_CLIENT_CONFIG_DIR, exist_ok=True)
-    config_path = get_config_file_path()
-
-    if endpoint != "":
-        final_endpoint = endpoint
-    else:
-        final_endpoint = prompt(
-            "> Enter the endpoint of the Llama Stack distribution server: ",
-            validator=Validator.from_callable(
-                lambda x: len(x) > 0 and (parsed := urlparse(x)).scheme and parsed.netloc,
-                error_message="Endpoint cannot be empty and must be a valid URL, please enter a valid endpoint",
-            ),
-        )
-
-    if api_key != "":
-        final_api_key = api_key
-    else:
-        final_api_key = prompt(
-            "> Enter the API key (leave empty if no key is needed): ",
-        )
-
-    # Prepare config dict before writing it
-    config_dict = {
-        "endpoint": final_endpoint,
-    }
-    if final_api_key != "":
-        config_dict["api_key"] = final_api_key
-
-    with open(config_path, "w") as f:
-        f.write(
-            yaml.dump(
-                config_dict,
-                sort_keys=True,
-            )
-        )
-
-    print(f"Done! You can now use the Llama Stack Client CLI with endpoint {final_endpoint}")
diff --git a/src/llama_stack_client/lib/cli/constants.py b/src/llama_stack_client/lib/cli/constants.py
deleted file mode 100644
index 22595747..00000000
--- a/src/llama_stack_client/lib/cli/constants.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import os
-from pathlib import Path
-
-LLAMA_STACK_CLIENT_CONFIG_DIR = Path(os.path.expanduser("~/.llama/client"))
-
-
-def get_config_file_path():
-    return LLAMA_STACK_CLIENT_CONFIG_DIR / "config.yaml"
diff --git a/src/llama_stack_client/lib/cli/datasets/__init__.py b/src/llama_stack_client/lib/cli/datasets/__init__.py
deleted file mode 100644
index ec7b144f..00000000
--- a/src/llama_stack_client/lib/cli/datasets/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .datasets import datasets
-
-__all__ = ["datasets"]
diff --git a/src/llama_stack_client/lib/cli/datasets/datasets.py b/src/llama_stack_client/lib/cli/datasets/datasets.py
deleted file mode 100644
index c01b875a..00000000
--- a/src/llama_stack_client/lib/cli/datasets/datasets.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-import click
-
-from .list import list_datasets
-from .register import register
-from .unregister import unregister
-
-
-@click.group()
-@click.help_option("-h", "--help")
-def datasets():
-    """Manage datasets."""
-
-
-# Register subcommands
-datasets.add_command(list_datasets)
-datasets.add_command(register)
-datasets.add_command(unregister)
diff --git a/src/llama_stack_client/lib/cli/datasets/list.py b/src/llama_stack_client/lib/cli/datasets/list.py
deleted file mode 100644
index 61d625c9..00000000
--- a/src/llama_stack_client/lib/cli/datasets/list.py
+++ /dev/null
@@ -1,32 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import click
-from rich.console import Console
-from rich.table import Table
-
-from ..common.utils import handle_client_errors
-
-
-@click.command("list")
-@click.help_option("-h", "--help")
-@click.pass_context
-@handle_client_errors("list datasets")
-def list_datasets(ctx):
-    """Show available datasets on distribution endpoint"""
-    client = ctx.obj["client"]
-    console = Console()
-    headers = ["identifier", "provider_id", "metadata", "type", "purpose"]
-
-    datasets_list_response = client.datasets.list()
-    if datasets_list_response:
-        table = Table()
-        for header in headers:
-            table.add_column(header)
-
-        for item in datasets_list_response:
-            table.add_row(*[str(getattr(item, header)) for header in headers])
-        console.print(table)
diff --git a/src/llama_stack_client/lib/cli/datasets/register.py b/src/llama_stack_client/lib/cli/datasets/register.py
deleted file mode 100644
index d990e30c..00000000
--- a/src/llama_stack_client/lib/cli/datasets/register.py
+++ /dev/null
@@ -1,78 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-import base64
-import json
-import mimetypes
-import os
-from typing import Optional, Literal
-
-import click
-import yaml
-
-from ..common.utils import handle_client_errors
-
-
-def data_url_from_file(file_path: str) -> str:
-    if not os.path.exists(file_path):
-        raise FileNotFoundError(f"File not found: {file_path}")
-
-    with open(file_path, "rb") as file:
-        file_content = file.read()
-
-    base64_content = base64.b64encode(file_content).decode("utf-8")
-    mime_type, _ = mimetypes.guess_type(file_path)
-
-    data_url = f"data:{mime_type};base64,{base64_content}"
-    return data_url
-
-
-@click.command("register")
-@click.help_option("-h", "--help")
-@click.option("--dataset-id", required=True, help="Id of the dataset")
-@click.option(
-    "--purpose",
-    type=click.Choice(["post-training/messages", "eval/question-answer", "eval/messages-answer"]),
-    help="Purpose of the dataset",
-    required=True,
-)
-@click.option("--metadata", type=str, help="Metadata of the dataset")
-@click.option("--url", type=str, help="URL of the dataset", required=False)
-@click.option(
-    "--dataset-path", required=False, help="Local file path to the dataset. If specified, upload dataset via URL"
-)
-@click.pass_context
-@handle_client_errors("register dataset")
-def register(
-    ctx,
-    dataset_id: str,
-    purpose: Literal["post-training/messages", "eval/question-answer", "eval/messages-answer"],
-    metadata: Optional[str],
-    url: Optional[str],
-    dataset_path: Optional[str],
-):
-    """Create a new dataset"""
-    client = ctx.obj["client"]
-
-    if metadata:
-        try:
-            metadata = json.loads(metadata)
-        except json.JSONDecodeError as err:
-            raise click.BadParameter("Metadata must be valid JSON") from err
-
-    if dataset_path:
-        url = data_url_from_file(dataset_path)
-    else:
-        if not url:
-            raise click.BadParameter("URL is required when dataset path is not specified")
-
-    response = client.datasets.register(
-        dataset_id=dataset_id,
-        source={"uri": url},
-        metadata=metadata,
-        purpose=purpose,
-    )
-    if response:
-        click.echo(yaml.dump(response.dict()))
diff --git a/src/llama_stack_client/lib/cli/datasets/unregister.py b/src/llama_stack_client/lib/cli/datasets/unregister.py
deleted file mode 100644
index 8ca7cceb..00000000
--- a/src/llama_stack_client/lib/cli/datasets/unregister.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-import click
-
-from ..common.utils import handle_client_errors
-
-
-@click.command("unregister")
-@click.help_option("-h", "--help")
-@click.argument("dataset-id", required=True)
-@click.pass_context
-@handle_client_errors("unregister dataset")
-def unregister(ctx, dataset_id: str):
-    """Remove a dataset"""
-    client = ctx.obj["client"]
-    client.datasets.unregister(dataset_id=dataset_id)
-    click.echo(f"Dataset '{dataset_id}' unregistered successfully")
diff --git a/src/llama_stack_client/lib/cli/eval/__init__.py b/src/llama_stack_client/lib/cli/eval/__init__.py
deleted file mode 100644
index 503994e9..00000000
--- a/src/llama_stack_client/lib/cli/eval/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .eval import eval
-
-__all__ = ["eval"]
diff --git a/src/llama_stack_client/lib/cli/eval/eval.py b/src/llama_stack_client/lib/cli/eval/eval.py
deleted file mode 100644
index dd162809..00000000
--- a/src/llama_stack_client/lib/cli/eval/eval.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-
-import click
-
-from .run_benchmark import run_benchmark
-from .run_scoring import run_scoring
-
-
-@click.group()
-@click.help_option("-h", "--help")
-def eval():
-    """Run evaluation tasks."""
-
-
-# Register subcommands
-eval.add_command(run_benchmark)
-eval.add_command(run_scoring)
diff --git a/src/llama_stack_client/lib/cli/eval/run_benchmark.py b/src/llama_stack_client/lib/cli/eval/run_benchmark.py
deleted file mode 100644
index e088137e..00000000
--- a/src/llama_stack_client/lib/cli/eval/run_benchmark.py
+++ /dev/null
@@ -1,194 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import json
-import os
-from typing import Optional
-
-import click
-from rich import print as rprint
-from tqdm.rich import tqdm
-
-from ..common.utils import create_bar_chart
-from .utils import (
-    aggregate_accuracy,
-    aggregate_average,
-    aggregate_weighted_average,
-    aggregate_categorical_count,
-    aggregate_median,
-)
-
-
-@click.command("run-benchmark")
-@click.help_option("-h", "--help")
-@click.argument("benchmark-ids", nargs=-1, required=True)
-@click.option(
-    "--model-id",
-    required=True,
-    help="model id to run the benchmark eval on",
-    default=None,
-    type=str,
-)
-@click.option(
-    "--output-dir",
-    required=True,
-    help="Path to the dump eval results output directory",
-)
-@click.option(
-    "--num-examples",
-    required=False,
-    help="Number of examples to evaluate on, useful for debugging",
-    default=None,
-    type=int,
-)
-@click.option(
-    "--temperature",
-    required=False,
-    help="temperature in the sampling params to run generation",
-    default=0.0,
-    type=float,
-)
-@click.option(
-    "--max-tokens",
-    required=False,
-    help="max-tokens in the sampling params to run generation",
-    default=4096,
-    type=int,
-)
-@click.option(
-    "--top-p",
-    required=False,
-    help="top-p in the sampling params to run generation",
-    default=0.9,
-    type=float,
-)
-@click.option(
-    "--repeat-penalty",
-    required=False,
-    help="repeat-penalty in the sampling params to run generation",
-    default=1.0,
-    type=float,
-)
-@click.option(
-    "--visualize",
-    is_flag=True,
-    default=False,
-    help="Visualize evaluation results after completion",
-)
-@click.pass_context
-def run_benchmark(
-    ctx,
-    benchmark_ids: tuple[str, ...],
-    model_id: str,
-    output_dir: str,
-    num_examples: Optional[int],
-    temperature: float,
-    max_tokens: int,
-    top_p: float,
-    repeat_penalty: float,
-    visualize: bool,
-):
-    """Run a evaluation benchmark task"""
-
-    client = ctx.obj["client"]
-
-    for benchmark_id in benchmark_ids:
-        benchmark = client.benchmarks.retrieve(benchmark_id=benchmark_id)
-        scoring_functions = benchmark.scoring_functions
-        dataset_id = benchmark.dataset_id
-
-        results = client.datasets.iterrows(dataset_id=dataset_id, limit=-1 if num_examples is None else num_examples)
-
-        output_res = {}
-
-        for i, r in enumerate(tqdm(results.data)):
-            eval_res = client.eval.evaluate_rows(
-                benchmark_id=benchmark_id,
-                input_rows=[r],
-                scoring_functions=scoring_functions,
-                benchmark_config={
-                    "type": "benchmark",
-                    "eval_candidate": {
-                        "type": "model",
-                        "model": model_id,
-                        "sampling_params": {
-                            "temperature": temperature,
-                            "max_tokens": max_tokens,
-                            "top_p": top_p,
-                            "repeat_penalty": repeat_penalty,
-                        },
-                    },
-                },
-            )
-            for k in r.keys():
-                if k not in output_res:
-                    output_res[k] = []
-                output_res[k].append(r[k])
-
-            for k in eval_res.generations[0].keys():
-                if k not in output_res:
-                    output_res[k] = []
-                output_res[k].append(eval_res.generations[0][k])
-
-            for scoring_fn in scoring_functions:
-                if scoring_fn not in output_res:
-                    output_res[scoring_fn] = []
-                output_res[scoring_fn].append(eval_res.scores[scoring_fn].score_rows[0])
-
-                aggregation_functions = client.scoring_functions.retrieve(
-                    scoring_fn_id=scoring_fn
-                ).params.aggregation_functions
-
-                # only output the aggregation result for the last row
-                if i == len(results.data) - 1:
-                    for aggregation_function in aggregation_functions:
-                        scoring_results = output_res[scoring_fn]
-                        if aggregation_function == "categorical_count":
-                            output_res[scoring_fn].append(aggregate_categorical_count(scoring_results))
-                        elif aggregation_function == "average":
-                            output_res[scoring_fn].append(aggregate_average(scoring_results))
-                        elif aggregation_function == "weighted_average":
-                            output_res[scoring_fn].append(aggregate_weighted_average(scoring_results))
-                        elif aggregation_function == "median":
-                            output_res[scoring_fn].append(aggregate_median(scoring_results))
-                        elif aggregation_function == "accuracy":
-                            output_res[scoring_fn].append(aggregate_accuracy(scoring_results))
-                        else:
-                            raise NotImplementedError(
-                                f"Aggregation function {aggregation_function} is not supported yet"
-                            )
-
-        # Create output directory if it doesn't exist
-        os.makedirs(output_dir, exist_ok=True)
-        # Save results to JSON file
-        output_file = os.path.join(output_dir, f"{benchmark_id}_results.json")
-        with open(output_file, "w") as f:
-            json.dump(output_res, f, indent=2)
-
-        rprint(f"[green]✓[/green] Results saved to: [blue]{output_file}[/blue]!\n")
-
-        if visualize:
-            for scoring_fn in scoring_functions:
-                aggregation_functions = client.scoring_functions.retrieve(
-                    scoring_fn_id=scoring_fn
-                ).params.aggregation_functions
-
-                for aggregation_function in aggregation_functions:
-                    res = output_res[scoring_fn]
-                    assert len(res) > 0 and "score" in res[0]
-                    if aggregation_function == "categorical_count":
-                        scores = [str(r["score"]) for r in res]
-                        unique_scores = sorted(list(set(scores)))
-                        counts = [scores.count(s) for s in unique_scores]
-                        create_bar_chart(
-                            counts,
-                            unique_scores,
-                            title=f"{scoring_fn}-{aggregation_function}",
-                        )
-                    else:
-                        raise NotImplementedError(
-                            f"Aggregation function {aggregation_function} ius not supported for visualization yet"
-                        )
diff --git a/src/llama_stack_client/lib/cli/eval/run_scoring.py b/src/llama_stack_client/lib/cli/eval/run_scoring.py
deleted file mode 100644
index 78560a0a..00000000
--- a/src/llama_stack_client/lib/cli/eval/run_scoring.py
+++ /dev/null
@@ -1,120 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import json
-import os
-from typing import Optional
-
-import click
-import pandas
-from rich import print as rprint
-from tqdm.rich import tqdm
-
-
-@click.command("run-scoring")
-@click.help_option("-h", "--help")
-@click.argument("scoring-function-ids", nargs=-1, required=True)
-@click.option(
-    "--dataset-id",
-    required=False,
-    help="Pre-registered dataset_id to score (from llama-stack-client datasets list)",
-)
-@click.option(
-    "--dataset-path",
-    required=False,
-    help="Path to the dataset file to score",
-    type=click.Path(exists=True),
-)
-@click.option(
-    "--scoring-params-config",
-    required=False,
-    help="Path to the scoring params config file in JSON format",
-    type=click.Path(exists=True),
-)
-@click.option(
-    "--num-examples",
-    required=False,
-    help="Number of examples to evaluate on, useful for debugging",
-    default=None,
-    type=int,
-)
-@click.option(
-    "--output-dir",
-    required=True,
-    help="Path to the dump eval results output directory",
-)
-@click.option(
-    "--visualize",
-    is_flag=True,
-    default=False,
-    help="Visualize evaluation results after completion",
-)
-@click.pass_context
-def run_scoring(
-    ctx,
-    scoring_function_ids: tuple[str, ...],
-    dataset_id: Optional[str],
-    dataset_path: Optional[str],
-    scoring_params_config: Optional[str],
-    num_examples: Optional[int],
-    output_dir: str,
-    visualize: bool,
-):
-    """Run scoring from application datasets"""
-    # one of dataset_id or dataset_path is required
-    if dataset_id is None and dataset_path is None:
-        raise click.BadParameter("Specify either dataset_id (pre-registered dataset) or dataset_path (local file)")
-
-    client = ctx.obj["client"]
-
-    scoring_params = {fn_id: None for fn_id in scoring_function_ids}
-    if scoring_params_config:
-        with open(scoring_params_config, "r") as f:
-            scoring_params = json.load(f)
-
-    output_res = {}
-
-    if dataset_id is not None:
-        dataset = client.datasets.retrieve(dataset_id=dataset_id)
-        if not dataset:
-            click.BadParameter(
-                f"Dataset {dataset_id} not found. Please register using llama-stack-client datasets register"
-            )
-
-        # TODO: this will eventually be replaced with jobs polling from server vis score_bath
-        # For now, get all datasets rows via datasets API
-        results = client.datasets.iterrows(dataset_id=dataset_id, limit=-1 if num_examples is None else num_examples)
-        rows = results.rows
-
-    if dataset_path is not None:
-        df = pandas.read_csv(dataset_path)
-        rows = df.to_dict(orient="records")
-        if num_examples is not None:
-            rows = rows[:num_examples]
-
-    for r in tqdm(rows):
-        score_res = client.scoring.score(
-            input_rows=[r],
-            scoring_functions=scoring_params,
-        )
-        for k in r.keys():
-            if k not in output_res:
-                output_res[k] = []
-            output_res[k].append(r[k])
-
-        for fn_id in scoring_function_ids:
-            if fn_id not in output_res:
-                output_res[fn_id] = []
-            output_res[fn_id].append(score_res.results[fn_id].score_rows[0])
-
-    # Create output directory if it doesn't exist
-    os.makedirs(output_dir, exist_ok=True)
-    output_file = os.path.join(output_dir, f"{dataset_path or dataset_id}_score_results.csv")
-    df = pandas.DataFrame(output_res)
-    df.to_csv(output_file, index=False)
-    print(df)
-
-    rprint(f"[green]✓[/green] Results saved to: [blue]{output_file}[/blue]!\n")
diff --git a/src/llama_stack_client/lib/cli/eval/utils.py b/src/llama_stack_client/lib/cli/eval/utils.py
deleted file mode 100644
index 96d8d54c..00000000
--- a/src/llama_stack_client/lib/cli/eval/utils.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from typing import Any, Dict, List, Union
-
-
-def aggregate_categorical_count(
-    scoring_results: List[Dict[str, Union[bool, float, str, List[object], object, None]]],
-) -> Dict[str, Any]:
-    scores = [str(r["score"]) for r in scoring_results]
-    unique_scores = sorted(list(set(scores)))
-    return {"categorical_count": {s: scores.count(s) for s in unique_scores}}
-
-
-def aggregate_average(
-    scoring_results: List[Dict[str, Union[bool, float, str, List[object], object, None]]],
-) -> Dict[str, Any]:
-    return {
-        "average": sum(result["score"] for result in scoring_results if result["score"] is not None)
-        / len([_ for _ in scoring_results if _["score"] is not None]),
-    }
-
-
-def aggregate_weighted_average(
-    scoring_results: List[Dict[str, Union[bool, float, str, List[object], object, None]]],
-) -> Dict[str, Any]:
-    return {
-        "weighted_average": sum(
-            result["score"] * result["weight"]
-            for result in scoring_results
-            if result["score"] is not None and result["weight"] is not None
-        )
-        / sum(result["weight"] for result in scoring_results if result["weight"] is not None),
-    }
-
-
-def aggregate_median(
-    scoring_results: List[Dict[str, Union[bool, float, str, List[object], object, None]]],
-) -> Dict[str, Any]:
-    scores = [r["score"] for r in scoring_results if r["score"] is not None]
-    median = statistics.median(scores) if scores else None
-    return {"median": median}
-
-
-def aggregate_accuracy(
-    scoring_results: List[Dict[str, Union[bool, float, str, List[object], object, None]]],
-) -> Dict[str, Any]:
-    num_correct = sum(result["score"] for result in scoring_results)
-    avg_score = num_correct / len(scoring_results)
-
-    return {
-        "accuracy": avg_score,
-        "num_correct": num_correct,
-        "num_total": len(scoring_results),
-    }
diff --git a/src/llama_stack_client/lib/cli/eval_tasks/__init__.py b/src/llama_stack_client/lib/cli/eval_tasks/__init__.py
deleted file mode 100644
index d755c85a..00000000
--- a/src/llama_stack_client/lib/cli/eval_tasks/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .eval_tasks import eval_tasks
-
-__all__ = ["eval_tasks"]
diff --git a/src/llama_stack_client/lib/cli/eval_tasks/eval_tasks.py b/src/llama_stack_client/lib/cli/eval_tasks/eval_tasks.py
deleted file mode 100644
index 183498fb..00000000
--- a/src/llama_stack_client/lib/cli/eval_tasks/eval_tasks.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-
-import json
-from typing import Optional
-
-import click
-import yaml
-
-from ..common.utils import handle_client_errors
-from .list import list_eval_tasks
-
-
-@click.group()
-@click.help_option("-h", "--help")
-def eval_tasks():
-    """Manage evaluation tasks."""
-
-
-@eval_tasks.command()
-@click.help_option("-h", "--help")
-@click.option("--eval-task-id", required=True, help="ID of the eval task")
-@click.option("--dataset-id", required=True, help="ID of the dataset to evaluate")
-@click.option("--scoring-functions", required=True, multiple=True, help="Scoring functions to use for evaluation")
-@click.option("--provider-id", help="Provider ID for the eval task", default=None)
-@click.option("--provider-eval-task-id", help="Provider's eval task ID", default=None)
-@click.option("--metadata", type=str, help="Metadata for the eval task in JSON format")
-@click.pass_context
-@handle_client_errors("register eval task")
-def register(
-    ctx,
-    eval_task_id: str,
-    dataset_id: str,
-    scoring_functions: tuple[str, ...],
-    provider_id: Optional[str],
-    provider_eval_task_id: Optional[str],
-    metadata: Optional[str],
-):
-    """Register a new eval task"""
-    client = ctx.obj["client"]
-
-    if metadata:
-        try:
-            metadata = json.loads(metadata)
-        except json.JSONDecodeError as err:
-            raise click.BadParameter("Metadata must be valid JSON") from err
-
-    response = client.eval_tasks.register(
-        eval_task_id=eval_task_id,
-        dataset_id=dataset_id,
-        scoring_functions=scoring_functions,
-        provider_id=provider_id,
-        provider_eval_task_id=provider_eval_task_id,
-        metadata=metadata,
-    )
-    if response:
-        click.echo(yaml.dump(response.dict()))
-
-
-# Register subcommands
-eval_tasks.add_command(list_eval_tasks)
-eval_tasks.add_command(register)
diff --git a/src/llama_stack_client/lib/cli/eval_tasks/list.py b/src/llama_stack_client/lib/cli/eval_tasks/list.py
deleted file mode 100644
index d7eb9c53..00000000
--- a/src/llama_stack_client/lib/cli/eval_tasks/list.py
+++ /dev/null
@@ -1,35 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import click
-from rich.console import Console
-from rich.table import Table
-
-from ..common.utils import handle_client_errors
-
-
-@click.command("list")
-@click.help_option("-h", "--help")
-@click.pass_context
-@handle_client_errors("list eval tasks")
-def list_eval_tasks(ctx):
-    """Show available eval tasks on distribution endpoint"""
-
-    client = ctx.obj["client"]
-    console = Console()
-    headers = []
-    eval_tasks_list_response = client.eval_tasks.list()
-    if eval_tasks_list_response and len(eval_tasks_list_response) > 0:
-        headers = sorted(eval_tasks_list_response[0].__dict__.keys())
-
-    if eval_tasks_list_response:
-        table = Table()
-        for header in headers:
-            table.add_column(header)
-
-        for item in eval_tasks_list_response:
-            table.add_row(*[str(getattr(item, header)) for header in headers])
-        console.print(table)
diff --git a/src/llama_stack_client/lib/cli/inference/__init__.py b/src/llama_stack_client/lib/cli/inference/__init__.py
deleted file mode 100644
index d10d45c4..00000000
--- a/src/llama_stack_client/lib/cli/inference/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .inference import inference
-
-__all__ = ["inference"]
diff --git a/src/llama_stack_client/lib/cli/inference/inference.py b/src/llama_stack_client/lib/cli/inference/inference.py
deleted file mode 100644
index 0cc16396..00000000
--- a/src/llama_stack_client/lib/cli/inference/inference.py
+++ /dev/null
@@ -1,89 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from typing import Optional, List, Dict
-import traceback
-
-import click
-from rich.console import Console
-
-from ...inference.event_logger import EventLogger
-from ..common.utils import handle_client_errors
-
-
-@click.group()
-@click.help_option("-h", "--help")
-def inference():
-    """Inference (chat)."""
-
-
-@click.command("chat-completion")
-@click.help_option("-h", "--help")
-@click.option("--message", help="Message")
-@click.option("--stream", is_flag=True, help="Streaming", default=False)
-@click.option("--session", is_flag=True, help="Start a Chat Session", default=False)
-@click.option("--model-id", required=False, help="Model ID")
-@click.pass_context
-@handle_client_errors("inference chat-completion")
-def chat_completion(ctx, message: str, stream: bool, session: bool, model_id: Optional[str]):
-    """Show available inference chat completion endpoints on distribution endpoint"""
-    if not message and not session:
-        click.secho(
-            "you must specify either --message or --session",
-            fg="red",
-        )
-        raise click.exceptions.Exit(1)
-    client = ctx.obj["client"]
-    console = Console()
-
-    if not model_id:
-        available_models = [model.identifier for model in client.models.list() if model.model_type == "llm"]
-        model_id = available_models[0]
-
-    messages = []
-    if message:
-        messages.append({"role": "user", "content": message})
-        response = client.chat.completions.create(
-            model=model_id,
-            messages=messages,
-            stream=stream,
-        )
-        if not stream:
-            console.print(response)
-        else:
-            for event in EventLogger().log(response):
-                event.print()
-    if session:
-        chat_session(client=client, model_id=model_id, messages=messages, console=console)
-
-
-def chat_session(client, model_id: Optional[str], messages: List[Dict[str, str]], console: Console):
-    """Run an interactive chat session with the served model"""
-    while True:
-        try:
-            message = input(">>> ")
-            if message in ["\\q", "quit"]:
-                console.print("Exiting")
-                break
-            messages.append({"role": "user", "content": message})
-            response = client.chat.completions.create(
-                model=model_id,
-                messages=messages,
-                stream=True,
-            )
-            for event in EventLogger().log(response):
-                event.print()
-        except Exception as exc:
-            traceback.print_exc()
-            console.print(f"Error in chat session {exc}")
-            break
-        except KeyboardInterrupt as exc:
-            console.print("\nDetected user interrupt, exiting")
-            break
-
-
-# Register subcommands
-inference.add_command(chat_completion)
diff --git a/src/llama_stack_client/lib/cli/inspect/__init__.py b/src/llama_stack_client/lib/cli/inspect/__init__.py
deleted file mode 100644
index db651969..00000000
--- a/src/llama_stack_client/lib/cli/inspect/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from .inspect import inspect
-
-__all__ = ["inspect"]
diff --git a/src/llama_stack_client/lib/cli/inspect/inspect.py b/src/llama_stack_client/lib/cli/inspect/inspect.py
deleted file mode 100644
index f9c85b1b..00000000
--- a/src/llama_stack_client/lib/cli/inspect/inspect.py
+++ /dev/null
@@ -1,13 +0,0 @@
-import click
-
-from .version import inspect_version
-
-
-@click.group()
-@click.help_option("-h", "--help")
-def inspect():
-    """Inspect server configuration."""
-
-
-# Register subcommands
-inspect.add_command(inspect_version)
diff --git a/src/llama_stack_client/lib/cli/inspect/version.py b/src/llama_stack_client/lib/cli/inspect/version.py
deleted file mode 100644
index 212b9f9d..00000000
--- a/src/llama_stack_client/lib/cli/inspect/version.py
+++ /dev/null
@@ -1,16 +0,0 @@
-import click
-from rich.console import Console
-
-from ..common.utils import handle_client_errors
-
-
-@click.command("version")
-@click.help_option("-h", "--help")
-@click.pass_context
-@handle_client_errors("inspect version")
-def inspect_version(ctx):
-    """Show available providers on distribution endpoint"""
-    client = ctx.obj["client"]
-    console = Console()
-    version_response = client.inspect.version()
-    console.print(version_response)
diff --git a/src/llama_stack_client/lib/cli/llama_stack_client.py b/src/llama_stack_client/lib/cli/llama_stack_client.py
deleted file mode 100644
index 54c46aaa..00000000
--- a/src/llama_stack_client/lib/cli/llama_stack_client.py
+++ /dev/null
@@ -1,103 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import os
-from importlib.metadata import version
-
-import click
-import yaml
-
-from llama_stack_client import LlamaStackClient
-
-from .configure import configure
-from .constants import get_config_file_path
-from .datasets import datasets
-from .eval import eval
-from .eval_tasks import eval_tasks
-from .inference import inference
-from .inspect import inspect
-from .models import models
-from .post_training import post_training
-from .providers import providers
-from .scoring_functions import scoring_functions
-from .shields import shields
-from .toolgroups import toolgroups
-from .vector_dbs import vector_dbs
-
-
-@click.group()
-@click.help_option("-h", "--help")
-@click.version_option(version=version("llama-stack-client"), prog_name="llama-stack-client")
-@click.option("--endpoint", type=str, help="Llama Stack distribution endpoint", default="")
-@click.option("--api-key", type=str, help="Llama Stack distribution API key", default="")
-@click.option("--config", type=str, help="Path to config file", default=None)
-@click.pass_context
-def llama_stack_client(ctx, endpoint: str, api_key: str, config: str | None):
-    """Welcome to the llama-stack-client CLI - a command-line interface for interacting with Llama Stack"""
-    ctx.ensure_object(dict)
-
-    # If no config provided, check default location
-    if config and endpoint:
-        raise ValueError("Cannot use both config and endpoint")
-
-    if config is None:
-        default_config = get_config_file_path()
-        if default_config.exists():
-            config = str(default_config)
-
-    if config:
-        try:
-            with open(config, "r") as f:
-                config_dict = yaml.safe_load(f)
-                endpoint = config_dict.get("endpoint", endpoint)
-                api_key = config_dict.get("api_key", "")
-        except Exception as e:
-            click.echo(f"Error loading config from {config}: {str(e)}", err=True)
-            click.echo("Falling back to HTTP client with endpoint", err=True)
-
-    if endpoint == "":
-        endpoint = "http://localhost:8321"
-
-    default_headers = {}
-    if api_key != "":
-        default_headers = {
-            "Authorization": f"Bearer {api_key}",
-        }
-
-    client = LlamaStackClient(
-        base_url=endpoint,
-        provider_data={
-            "fireworks_api_key": os.environ.get("FIREWORKS_API_KEY", ""),
-            "together_api_key": os.environ.get("TOGETHER_API_KEY", ""),
-            "openai_api_key": os.environ.get("OPENAI_API_KEY", ""),
-        },
-        default_headers=default_headers,
-    )
-    ctx.obj = {"client": client}
-
-
-# Register all subcommands
-llama_stack_client.add_command(models, "models")
-llama_stack_client.add_command(vector_dbs, "vector_dbs")
-llama_stack_client.add_command(shields, "shields")
-llama_stack_client.add_command(eval_tasks, "eval_tasks")
-llama_stack_client.add_command(providers, "providers")
-llama_stack_client.add_command(datasets, "datasets")
-llama_stack_client.add_command(configure, "configure")
-llama_stack_client.add_command(scoring_functions, "scoring_functions")
-llama_stack_client.add_command(eval, "eval")
-llama_stack_client.add_command(inference, "inference")
-llama_stack_client.add_command(post_training, "post_training")
-llama_stack_client.add_command(inspect, "inspect")
-llama_stack_client.add_command(toolgroups, "toolgroups")
-
-
-def main():
-    llama_stack_client()
-
-
-if __name__ == "__main__":
-    main()
diff --git a/src/llama_stack_client/lib/cli/models/__init__.py b/src/llama_stack_client/lib/cli/models/__init__.py
deleted file mode 100644
index 64479669..00000000
--- a/src/llama_stack_client/lib/cli/models/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .models import models
-
-__all__ = ["models"]
diff --git a/src/llama_stack_client/lib/cli/models/models.py b/src/llama_stack_client/lib/cli/models/models.py
deleted file mode 100644
index c724e5d5..00000000
--- a/src/llama_stack_client/lib/cli/models/models.py
+++ /dev/null
@@ -1,143 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from typing import Optional
-
-import click
-from rich.console import Console
-from rich.table import Table
-
-from ..common.utils import handle_client_errors
-
-
-@click.group()
-@click.help_option("-h", "--help")
-def models():
-    """Manage GenAI models."""
-
-
-@click.command(name="list", help="Show available llama models at distribution endpoint")
-@click.help_option("-h", "--help")
-@click.pass_context
-@handle_client_errors("list models")
-def list_models(ctx):
-    client = ctx.obj["client"]
-    console = Console()
-
-    headers = [
-        "model_type",
-        "identifier",
-        "provider_alias",
-        "metadata",
-        "provider_id",
-    ]
-    response = client.models.list()
-    if response:
-        table = Table(
-            show_lines=True,  # Add lines between rows for better readability
-            padding=(0, 1),  # Add horizontal padding
-            expand=True,  # Allow table to use full width
-        )
-
-        # Configure columns with specific styling
-        table.add_column("model_type", style="blue")
-        table.add_column("identifier", style="bold cyan", no_wrap=True, overflow="fold")
-        table.add_column("provider_resource_id", style="yellow", no_wrap=True, overflow="fold")
-        table.add_column("metadata", style="magenta", max_width=30, overflow="fold")
-        table.add_column("provider_id", style="green", max_width=20)
-
-        for item in response:
-            table.add_row(
-                item.model_type,
-                item.identifier,
-                item.provider_resource_id,
-                str(item.metadata or ""),
-                item.provider_id,
-            )
-
-        # Create a title for the table
-        console.print("\n[bold]Available Models[/bold]\n")
-        console.print(table)
-        console.print(f"\nTotal models: {len(response)}\n")
-
-
-@click.command(name="get")
-@click.help_option("-h", "--help")
-@click.argument("model_id")
-@click.pass_context
-@handle_client_errors("get model details")
-def get_model(ctx, model_id: str):
-    """Show details of a specific model at the distribution endpoint"""
-    client = ctx.obj["client"]
-    console = Console()
-
-    models_get_response = client.models.retrieve(model_id=model_id)
-
-    if not models_get_response:
-        console.print(
-            f"Model {model_id} is not found at distribution endpoint. "
-            "Please ensure endpoint is serving specified model.",
-            style="bold red",
-        )
-        return
-
-    headers = sorted(models_get_response.__dict__.keys())
-    table = Table()
-    for header in headers:
-        table.add_column(header)
-
-    table.add_row(*[str(models_get_response.__dict__[header]) for header in headers])
-    console.print(table)
-
-
-@click.command(name="register", help="Register a new model at distribution endpoint")
-@click.help_option("-h", "--help")
-@click.argument("model_id")
-@click.option("--provider-id", help="Provider ID for the model", default=None)
-@click.option("--provider-model-id", help="Provider's model ID", default=None)
-@click.option("--metadata", help="JSON metadata for the model", default=None)
-@click.pass_context
-@handle_client_errors("register model")
-def register_model(
-    ctx,
-    model_id: str,
-    provider_id: Optional[str],
-    provider_model_id: Optional[str],
-    metadata: Optional[str],
-):
-    """Register a new model at distribution endpoint"""
-    client = ctx.obj["client"]
-    console = Console()
-
-    response = client.models.register(
-        model_id=model_id,
-        provider_id=provider_id,
-        provider_model_id=provider_model_id,
-        metadata=metadata,
-    )
-    if response:
-        console.print(f"[green]Successfully registered model {model_id}[/green]")
-
-
-@click.command(name="unregister", help="Unregister a model from distribution endpoint")
-@click.help_option("-h", "--help")
-@click.argument("model_id")
-@click.pass_context
-@handle_client_errors("unregister model")
-def unregister_model(ctx, model_id: str):
-    client = ctx.obj["client"]
-    console = Console()
-
-    response = client.models.unregister(model_id=model_id)
-    if response:
-        console.print(f"[green]Successfully deleted model {model_id}[/green]")
-
-
-# Register subcommands
-models.add_command(list_models)
-models.add_command(get_model)
-models.add_command(register_model)
-models.add_command(unregister_model)
diff --git a/src/llama_stack_client/lib/cli/post_training/__init__.py b/src/llama_stack_client/lib/cli/post_training/__init__.py
deleted file mode 100644
index bbb17b3c..00000000
--- a/src/llama_stack_client/lib/cli/post_training/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .post_training import post_training
-
-__all__ = ["post_training"]
diff --git a/src/llama_stack_client/lib/cli/post_training/post_training.py b/src/llama_stack_client/lib/cli/post_training/post_training.py
deleted file mode 100644
index b9b353fb..00000000
--- a/src/llama_stack_client/lib/cli/post_training/post_training.py
+++ /dev/null
@@ -1,115 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from typing import Optional
-
-import click
-from rich.console import Console
-
-from llama_stack_client.types.post_training_supervised_fine_tune_params import AlgorithmConfigParam, TrainingConfig
-
-from ..common.utils import handle_client_errors
-
-
-@click.group()
-@click.help_option("-h", "--help")
-def post_training():
-    """Post-training."""
-
-
-@click.command("supervised_fine_tune")
-@click.help_option("-h", "--help")
-@click.option("--job-uuid", required=True, help="Job UUID")
-@click.option("--model", required=True, help="Model ID")
-@click.option("--algorithm-config", required=True, help="Algorithm Config")
-@click.option("--training-config", required=True, help="Training Config")
-@click.option("--checkpoint-dir", required=False, help="Checkpoint Config", default=None)
-@click.pass_context
-@handle_client_errors("post_training supervised_fine_tune")
-def supervised_fine_tune(
-    ctx,
-    job_uuid: str,
-    model: str,
-    algorithm_config: AlgorithmConfigParam,
-    training_config: TrainingConfig,
-    checkpoint_dir: Optional[str],
-):
-    """Kick off a supervised fine tune job"""
-    client = ctx.obj["client"]
-    console = Console()
-
-    post_training_job = client.post_training.supervised_fine_tune(
-        job_uuid=job_uuid,
-        model=model,
-        algorithm_config=algorithm_config,
-        training_config=training_config,
-        checkpoint_dir=checkpoint_dir,
-        # logger_config and hyperparam_search_config haven't been used yet
-        logger_config={},
-        hyperparam_search_config={},
-    )
-    console.print(post_training_job.job_uuid)
-
-
-@click.command("list")
-@click.help_option("-h", "--help")
-@click.pass_context
-@handle_client_errors("post_training get_training_jobs")
-def get_training_jobs(ctx):
-    """Show the list of available post training jobs"""
-    client = ctx.obj["client"]
-    console = Console()
-
-    post_training_jobs = client.post_training.job.list()
-    console.print([post_training_job.job_uuid for post_training_job in post_training_jobs])
-
-
-@click.command("status")
-@click.help_option("-h", "--help")
-@click.option("--job-uuid", required=True, help="Job UUID")
-@click.pass_context
-@handle_client_errors("post_training get_training_job_status")
-def get_training_job_status(ctx, job_uuid: str):
-    """Show the status of a specific post training job"""
-    client = ctx.obj["client"]
-    console = Console()
-
-    job_status_reponse = client.post_training.job.status(job_uuid=job_uuid)
-    console.print(job_status_reponse)
-
-
-@click.command("artifacts")
-@click.help_option("-h", "--help")
-@click.option("--job-uuid", required=True, help="Job UUID")
-@click.pass_context
-@handle_client_errors("post_training get_training_job_artifacts")
-def get_training_job_artifacts(ctx, job_uuid: str):
-    """Get the training artifacts of a specific post training job"""
-    client = ctx.obj["client"]
-    console = Console()
-
-    job_artifacts = client.post_training.job.artifacts(job_uuid=job_uuid)
-    console.print(job_artifacts)
-
-
-@click.command("cancel")
-@click.help_option("-h", "--help")
-@click.option("--job-uuid", required=True, help="Job UUID")
-@click.pass_context
-@handle_client_errors("post_training cancel_training_job")
-def cancel_training_job(ctx, job_uuid: str):
-    """Cancel the training job"""
-    client = ctx.obj["client"]
-
-    client.post_training.job.cancel(job_uuid=job_uuid)
-
-
-# Register subcommands
-post_training.add_command(supervised_fine_tune)
-post_training.add_command(get_training_jobs)
-post_training.add_command(get_training_job_status)
-post_training.add_command(get_training_job_artifacts)
-post_training.add_command(cancel_training_job)
diff --git a/src/llama_stack_client/lib/cli/providers/__init__.py b/src/llama_stack_client/lib/cli/providers/__init__.py
deleted file mode 100644
index 2e632915..00000000
--- a/src/llama_stack_client/lib/cli/providers/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from .providers import providers
-
-__all__ = ["providers"]
diff --git a/src/llama_stack_client/lib/cli/providers/inspect.py b/src/llama_stack_client/lib/cli/providers/inspect.py
deleted file mode 100644
index fc03d00d..00000000
--- a/src/llama_stack_client/lib/cli/providers/inspect.py
+++ /dev/null
@@ -1,27 +0,0 @@
-import click
-import yaml
-from rich.console import Console
-
-from ..common.utils import handle_client_errors
-
-
-@click.command(name="inspect")
-@click.argument("provider_id")
-@click.pass_context
-@handle_client_errors("inspect providers")
-def inspect_provider(ctx, provider_id):
-    """Show available providers on distribution endpoint"""
-    client = ctx.obj["client"]
-    console = Console()
-
-    providers_response = client.providers.retrieve(provider_id=provider_id)
-
-    if not providers_response:
-        click.secho("Provider not found", fg="red")
-        raise click.exceptions.Exit(1)
-
-    console.print(f"provider_id={providers_response.provider_id}")
-    console.print(f"provider_type={providers_response.provider_type}")
-    console.print("config:")
-    for line in yaml.dump(providers_response.config, indent=2).split("\n"):
-        console.print(line)
diff --git a/src/llama_stack_client/lib/cli/providers/list.py b/src/llama_stack_client/lib/cli/providers/list.py
deleted file mode 100644
index 692860e3..00000000
--- a/src/llama_stack_client/lib/cli/providers/list.py
+++ /dev/null
@@ -1,26 +0,0 @@
-import click
-from rich.console import Console
-from rich.table import Table
-
-from ..common.utils import handle_client_errors
-
-
-@click.command("list")
-@click.help_option("-h", "--help")
-@click.pass_context
-@handle_client_errors("list providers")
-def list_providers(ctx):
-    """Show available providers on distribution endpoint"""
-    client = ctx.obj["client"]
-    console = Console()
-    headers = ["API", "Provider ID", "Provider Type"]
-
-    providers_response = client.providers.list()
-    table = Table()
-    for header in headers:
-        table.add_column(header)
-
-    for response in providers_response:
-        table.add_row(response.api, response.provider_id, response.provider_type)
-
-    console.print(table)
diff --git a/src/llama_stack_client/lib/cli/providers/providers.py b/src/llama_stack_client/lib/cli/providers/providers.py
deleted file mode 100644
index bd07628d..00000000
--- a/src/llama_stack_client/lib/cli/providers/providers.py
+++ /dev/null
@@ -1,15 +0,0 @@
-import click
-
-from .list import list_providers
-from .inspect import inspect_provider
-
-
-@click.group()
-@click.help_option("-h", "--help")
-def providers():
-    """Manage API providers."""
-
-
-# Register subcommands
-providers.add_command(list_providers)
-providers.add_command(inspect_provider)
diff --git a/src/llama_stack_client/lib/cli/scoring_functions/__init__.py b/src/llama_stack_client/lib/cli/scoring_functions/__init__.py
deleted file mode 100644
index 9699df68..00000000
--- a/src/llama_stack_client/lib/cli/scoring_functions/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .scoring_functions import scoring_functions
-
-__all__ = ["scoring_functions"]
diff --git a/src/llama_stack_client/lib/cli/scoring_functions/list.py b/src/llama_stack_client/lib/cli/scoring_functions/list.py
deleted file mode 100644
index b4bb3b70..00000000
--- a/src/llama_stack_client/lib/cli/scoring_functions/list.py
+++ /dev/null
@@ -1,38 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import click
-from rich.console import Console
-from rich.table import Table
-
-from ..common.utils import handle_client_errors
-
-
-@click.command("list")
-@click.help_option("-h", "--help")
-@click.pass_context
-@handle_client_errors("list scoring functions")
-def list_scoring_functions(ctx):
-    """Show available scoring functions on distribution endpoint"""
-
-    client = ctx.obj["client"]
-    console = Console()
-    headers = [
-        "identifier",
-        "provider_id",
-        "description",
-        "type",
-    ]
-
-    scoring_functions_list_response = client.scoring_functions.list()
-    if scoring_functions_list_response:
-        table = Table()
-        for header in headers:
-            table.add_column(header)
-
-        for item in scoring_functions_list_response:
-            table.add_row(*[str(getattr(item, header)) for header in headers])
-        console.print(table)
diff --git a/src/llama_stack_client/lib/cli/scoring_functions/scoring_functions.py b/src/llama_stack_client/lib/cli/scoring_functions/scoring_functions.py
deleted file mode 100644
index ba7b58eb..00000000
--- a/src/llama_stack_client/lib/cli/scoring_functions/scoring_functions.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import json
-from typing import Optional
-
-import click
-import yaml
-
-from .list import list_scoring_functions
-
-
-@click.group()
-@click.help_option("-h", "--help")
-def scoring_functions():
-    """Manage scoring functions."""
-
-
-@scoring_functions.command()
-@click.help_option("-h", "--help")
-@click.option("--scoring-fn-id", required=True, help="Id of the scoring function")
-@click.option("--description", required=True, help="Description of the scoring function")
-@click.option("--return-type", type=str, required=True, help="Return type of the scoring function")
-@click.option("--provider-id", type=str, help="Provider ID for the scoring function", default=None)
-@click.option("--provider-scoring-fn-id", type=str, help="Provider's scoring function ID", default=None)
-@click.option("--params", type=str, help="Parameters for the scoring function in JSON format", default=None)
-@click.pass_context
-def register(
-    ctx,
-    scoring_fn_id: str,
-    description: str,
-    return_type: str,
-    provider_id: Optional[str],
-    provider_scoring_fn_id: Optional[str],
-    params: Optional[str],
-):
-    """Register a new scoring function"""
-    client = ctx.obj["client"]
-
-    if params:
-        try:
-            params = json.loads(params)
-        except json.JSONDecodeError as err:
-            raise click.BadParameter("Parameters must be valid JSON") from err
-
-    response = client.scoring_functions.register(
-        scoring_fn_id=scoring_fn_id,
-        description=description,
-        return_type=json.loads(return_type),
-        provider_id=provider_id,
-        provider_scoring_fn_id=provider_scoring_fn_id,
-        params=params,
-    )
-    if response:
-        click.echo(yaml.dump(response.dict()))
-
-
-# Register subcommands
-scoring_functions.add_command(list_scoring_functions)
-scoring_functions.add_command(register)
diff --git a/src/llama_stack_client/lib/cli/shields/__init__.py b/src/llama_stack_client/lib/cli/shields/__init__.py
deleted file mode 100644
index 45f397c4..00000000
--- a/src/llama_stack_client/lib/cli/shields/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .shields import shields
-
-__all__ = ["shields"]
diff --git a/src/llama_stack_client/lib/cli/shields/shields.py b/src/llama_stack_client/lib/cli/shields/shields.py
deleted file mode 100644
index 5a3177f9..00000000
--- a/src/llama_stack_client/lib/cli/shields/shields.py
+++ /dev/null
@@ -1,98 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from typing import Optional
-
-import click
-import yaml
-from rich.console import Console
-from rich.table import Table
-
-from ..common.utils import handle_client_errors
-
-
-@click.group()
-@click.help_option("-h", "--help")
-def shields():
-    """Manage safety shield services."""
-
-
-@click.command("list")
-@click.help_option("-h", "--help")
-@click.pass_context
-@handle_client_errors("list shields")
-def list(ctx):
-    """Show available safety shields on distribution endpoint"""
-    client = ctx.obj["client"]
-    console = Console()
-
-    shields_list_response = client.shields.list()
-    headers = [
-        "identifier",
-        "provider_alias",
-        "params",
-        "provider_id",
-    ]
-
-    if shields_list_response:
-        table = Table(
-            show_lines=True,  # Add lines between rows for better readability
-            padding=(0, 1),  # Add horizontal padding
-            expand=True,  # Allow table to use full width
-        )
-
-        table.add_column("identifier", style="bold cyan", no_wrap=True, overflow="fold")
-        table.add_column("provider_alias", style="yellow", no_wrap=True, overflow="fold")
-        table.add_column("params", style="magenta", max_width=30, overflow="fold")
-        table.add_column("provider_id", style="green", max_width=20)
-
-        for item in shields_list_response:
-            table.add_row(
-                item.identifier,
-                item.provider_resource_id,
-                str(item.params or ""),
-                item.provider_id,
-            )
-
-        console.print(table)
-
-
-@shields.command()
-@click.help_option("-h", "--help")
-@click.option("--shield-id", required=True, help="Id of the shield")
-@click.option("--provider-id", help="Provider ID for the shield", default=None)
-@click.option("--provider-shield-id", help="Provider's shield ID", default=None)
-@click.option(
-    "--params",
-    type=str,
-    help="JSON configuration parameters for the shield",
-    default=None,
-)
-@click.pass_context
-@handle_client_errors("register shield")
-def register(
-    ctx,
-    shield_id: str,
-    provider_id: Optional[str],
-    provider_shield_id: Optional[str],
-    params: Optional[str],
-):
-    """Register a new safety shield"""
-    client = ctx.obj["client"]
-
-    response = client.shields.register(
-        shield_id=shield_id,
-        params=params,
-        provider_id=provider_id,
-        provider_shield_id=provider_shield_id,
-    )
-    if response:
-        click.echo(yaml.dump(response.dict()))
-
-
-# Register subcommands
-shields.add_command(list)
-shields.add_command(register)
diff --git a/src/llama_stack_client/lib/cli/toolgroups/__init__.py b/src/llama_stack_client/lib/cli/toolgroups/__init__.py
deleted file mode 100644
index 912d911b..00000000
--- a/src/llama_stack_client/lib/cli/toolgroups/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .toolgroups import toolgroups
-
-__all__ = ["toolgroups"]
diff --git a/src/llama_stack_client/lib/cli/toolgroups/toolgroups.py b/src/llama_stack_client/lib/cli/toolgroups/toolgroups.py
deleted file mode 100644
index 1e3d921d..00000000
--- a/src/llama_stack_client/lib/cli/toolgroups/toolgroups.py
+++ /dev/null
@@ -1,130 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from typing import Optional
-
-import click
-from rich.console import Console
-from rich.table import Table
-
-from ..common.utils import handle_client_errors
-from ....types import toolgroup_register_params
-from ...._types import NOT_GIVEN, NotGiven
-
-
-@click.group()
-@click.help_option("-h", "--help")
-def toolgroups():
-    """Manage available tool groups."""
-
-
-@click.command(name="list", help="Show available llama toolgroups at distribution endpoint")
-@click.help_option("-h", "--help")
-@click.pass_context
-@handle_client_errors("list toolgroups")
-def list_toolgroups(ctx):
-    client = ctx.obj["client"]
-    console = Console()
-
-    headers = ["identifier", "provider_id", "args", "mcp_endpoint"]
-    response = client.toolgroups.list()
-    if response:
-        table = Table()
-        for header in headers:
-            table.add_column(header)
-
-        for item in response:
-            row = [str(getattr(item, header)) for header in headers]
-            table.add_row(*row)
-        console.print(table)
-
-
-@click.command(name="get")
-@click.help_option("-h", "--help")
-@click.argument("toolgroup_id")
-@click.pass_context
-@handle_client_errors("get toolgroup details")
-def get_toolgroup(ctx, toolgroup_id: str):
-    """Show available llama toolgroups at distribution endpoint"""
-    client = ctx.obj["client"]
-    console = Console()
-
-    toolgroups_get_response = client.tools.list()
-    # filter response to only include provided toolgroup_id
-    toolgroups_get_response = [
-        toolgroup for toolgroup in toolgroups_get_response if toolgroup.toolgroup_id == toolgroup_id
-    ]
-    if len(toolgroups_get_response) == 0:
-        console.print(
-            f"Toolgroup {toolgroup_id} is not found at distribution endpoint. "
-            "Please ensure endpoint is serving specified toolgroup.",
-            style="bold red",
-        )
-        return
-
-    headers = sorted(toolgroups_get_response[0].__dict__.keys())
-    table = Table()
-    for header in headers:
-        table.add_column(header)
-
-    for toolgroup in toolgroups_get_response:
-        row = [str(getattr(toolgroup, header)) for header in headers]
-        table.add_row(*row)
-    console.print(table)
-
-
-@click.command(name="register", help="Register a new toolgroup at distribution endpoint")
-@click.help_option("-h", "--help")
-@click.argument("toolgroup_id")
-@click.option("--provider-id", help="Provider ID for the toolgroup", default=None)
-@click.option("--mcp-endpoint", help="JSON mcp_config for the toolgroup", default=None)
-@click.option("--args", help="JSON args for the toolgroup", default=None)
-@click.pass_context
-@handle_client_errors("register toolgroup")
-def register_toolgroup(
-    ctx,
-    toolgroup_id: str,
-    provider_id: Optional[str],
-    mcp_endpoint: Optional[str],
-    args: Optional[str],
-):
-    """Register a new toolgroup at distribution endpoint"""
-    client = ctx.obj["client"]
-    console = Console()
-
-    _mcp_endpoint: toolgroup_register_params.McpEndpoint | NotGiven = NOT_GIVEN
-    if mcp_endpoint:
-        _mcp_endpoint = toolgroup_register_params.McpEndpoint(uri=mcp_endpoint)
-
-    response = client.toolgroups.register(
-        toolgroup_id=toolgroup_id,
-        provider_id=provider_id,
-        args=args,
-        mcp_endpoint=_mcp_endpoint,
-    )
-    if response:
-        console.print(f"[green]Successfully registered toolgroup {toolgroup_id}[/green]")
-
-
-@click.command(name="unregister", help="Unregister a toolgroup from distribution endpoint")
-@click.help_option("-h", "--help")
-@click.argument("toolgroup_id")
-@click.pass_context
-@handle_client_errors("unregister toolgroup")
-def unregister_toolgroup(ctx, toolgroup_id: str):
-    client = ctx.obj["client"]
-    console = Console()
-
-    response = client.toolgroups.unregister(toolgroup_id=toolgroup_id)
-    if response:
-        console.print(f"[green]Successfully deleted toolgroup {toolgroup_id}[/green]")
-
-
-# Register subcommands
-toolgroups.add_command(list_toolgroups)
-toolgroups.add_command(get_toolgroup)
-toolgroups.add_command(register_toolgroup)
-toolgroups.add_command(unregister_toolgroup)
diff --git a/src/llama_stack_client/lib/cli/vector_dbs/__init__.py b/src/llama_stack_client/lib/cli/vector_dbs/__init__.py
deleted file mode 100644
index 62e1cd65..00000000
--- a/src/llama_stack_client/lib/cli/vector_dbs/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from .vector_dbs import vector_dbs
-
-__all__ = ["vector_dbs"]
diff --git a/src/llama_stack_client/lib/cli/vector_dbs/vector_dbs.py b/src/llama_stack_client/lib/cli/vector_dbs/vector_dbs.py
deleted file mode 100644
index cb196942..00000000
--- a/src/llama_stack_client/lib/cli/vector_dbs/vector_dbs.py
+++ /dev/null
@@ -1,116 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-from typing import Optional
-
-import click
-import yaml
-from rich.console import Console
-from rich.table import Table
-
-from ..common.utils import handle_client_errors
-
-
-@click.group()
-@click.help_option("-h", "--help")
-def vector_dbs():
-    """Manage vector databases."""
-
-
-@click.command("list")
-@click.help_option("-h", "--help")
-@click.pass_context
-@handle_client_errors("list vector dbs")
-def list(ctx):
-    """Show available vector dbs on distribution endpoint"""
-
-    client = ctx.obj["client"]
-    console = Console()
-    vector_dbs_list_response = client.vector_dbs.list()
-
-    if vector_dbs_list_response:
-        table = Table()
-        # Add our specific columns
-        table.add_column("identifier")
-        table.add_column("provider_id")
-        table.add_column("provider_resource_id")
-        table.add_column("vector_db_type")
-        table.add_column("params")
-
-        for item in vector_dbs_list_response:
-            # Create a dict of all attributes
-            item_dict = item.__dict__
-
-            # Extract our main columns
-            identifier = str(item_dict.pop("identifier", ""))
-            provider_id = str(item_dict.pop("provider_id", ""))
-            provider_resource_id = str(item_dict.pop("provider_resource_id", ""))
-            vector_db_type = str(item_dict.pop("vector_db_type", ""))
-            # Convert remaining attributes to YAML string for params column
-            params = yaml.dump(item_dict, default_flow_style=False)
-
-            table.add_row(identifier, provider_id, provider_resource_id, vector_db_type, params)
-
-        console.print(table)
-
-
-@vector_dbs.command()
-@click.help_option("-h", "--help")
-@click.argument("vector-db-id")
-@click.option("--provider-id", help="Provider ID for the vector db", default=None)
-@click.option("--provider-vector-db-id", help="Provider's vector db ID", default=None)
-@click.option(
-    "--embedding-model",
-    type=str,
-    help="Embedding model (for vector type)",
-    default="all-MiniLM-L6-v2",
-)
-@click.option(
-    "--embedding-dimension",
-    type=int,
-    help="Embedding dimension (for vector type)",
-    default=384,
-)
-@click.pass_context
-@handle_client_errors("register vector db")
-def register(
-    ctx,
-    vector_db_id: str,
-    provider_id: Optional[str],
-    provider_vector_db_id: Optional[str],
-    embedding_model: Optional[str],
-    embedding_dimension: Optional[int],
-):
-    """Create a new vector db"""
-    client = ctx.obj["client"]
-
-    response = client.vector_dbs.register(
-        vector_db_id=vector_db_id,
-        provider_id=provider_id,
-        provider_vector_db_id=provider_vector_db_id,
-        embedding_model=embedding_model,
-        embedding_dimension=embedding_dimension,
-    )
-    if response:
-        click.echo(yaml.dump(response.dict()))
-
-
-@vector_dbs.command()
-@click.help_option("-h", "--help")
-@click.argument("vector-db-id")
-@click.pass_context
-@handle_client_errors("delete vector db")
-def unregister(ctx, vector_db_id: str):
-    """Delete a vector db"""
-    client = ctx.obj["client"]
-    client.vector_dbs.unregister(vector_db_id=vector_db_id)
-    click.echo(f"Vector db '{vector_db_id}' deleted successfully")
-
-
-# Register subcommands
-vector_dbs.add_command(list)
-vector_dbs.add_command(register)
-vector_dbs.add_command(unregister)
diff --git a/src/llama_stack_client/lib/inference/__init__.py b/src/llama_stack_client/lib/inference/__init__.py
deleted file mode 100644
index 756f351d..00000000
--- a/src/llama_stack_client/lib/inference/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
diff --git a/src/llama_stack_client/lib/inference/event_logger.py b/src/llama_stack_client/lib/inference/event_logger.py
deleted file mode 100644
index 14b46372..00000000
--- a/src/llama_stack_client/lib/inference/event_logger.py
+++ /dev/null
@@ -1,80 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-from typing import Generator
-from termcolor import cprint
-from llama_stack_client.types import ChatCompletionResponseStreamChunk, ChatCompletionChunk
-
-
-class InferenceStreamPrintableEvent:
-    def __init__(
-        self,
-        content: str = "",
-        end: str = "\n",
-        color="white",
-    ):
-        self.content = content
-        self.color = color
-        self.end = "\n" if end is None else end
-
-    def print(self, flush=True):
-        cprint(f"{self.content}", color=self.color, end=self.end, flush=flush)
-
-
-class InferenceStreamLogEventPrinter:
-    def __init__(self):
-        self.is_thinking = False
-
-    def yield_printable_events(
-        self, chunk: ChatCompletionResponseStreamChunk | ChatCompletionChunk
-    ) -> Generator[InferenceStreamPrintableEvent, None, None]:
-        # Check if the chunk has event attribute (ChatCompletionResponseStreamChunk)
-        if hasattr(chunk, "event"):
-            yield from self._handle_inference_stream_chunk(chunk)
-        # Check if the chunk has choices attribute (ChatCompletionChunk)
-        elif hasattr(chunk, "choices") and len(chunk.choices) > 0:
-            yield from self._handle_chat_completion_chunk(chunk)
-
-    def _handle_inference_stream_chunk(
-        self, chunk: ChatCompletionResponseStreamChunk
-    ) -> Generator[InferenceStreamPrintableEvent, None, None]:
-        event = chunk.event
-        if event.event_type == "start":
-            yield InferenceStreamPrintableEvent("Assistant> ", color="cyan", end="")
-        elif event.event_type == "progress":
-            if event.delta.type == "reasoning":
-                if not self.is_thinking:
-                    yield InferenceStreamPrintableEvent("<thinking> ", color="magenta", end="")
-                    self.is_thinking = True
-                yield InferenceStreamPrintableEvent(event.delta.reasoning, color="magenta", end="")
-            else:
-                if self.is_thinking:
-                    yield InferenceStreamPrintableEvent("</thinking>", color="magenta", end="")
-                    self.is_thinking = False
-                yield InferenceStreamPrintableEvent(event.delta.text, color="yellow", end="")
-        elif event.event_type == "complete":
-            yield InferenceStreamPrintableEvent("")
-
-    def _handle_chat_completion_chunk(
-        self, chunk: ChatCompletionChunk
-    ) -> Generator[InferenceStreamPrintableEvent, None, None]:
-        choice = chunk.choices[0]
-        delta = choice.delta
-        if delta:
-            if delta.role:
-                yield InferenceStreamPrintableEvent(f"{delta.role}> ", color="cyan", end="")
-            if delta.content:
-                yield InferenceStreamPrintableEvent(delta.content, color="yellow", end="")
-            if choice.finish_reason:
-                if choice.finish_reason == "length":
-                    yield InferenceStreamPrintableEvent("<truncated>", color="red", end="")
-                yield InferenceStreamPrintableEvent()
-
-
-class EventLogger:
-    def log(self, event_generator):
-        printer = InferenceStreamLogEventPrinter()
-        for chunk in event_generator:
-            yield from printer.yield_printable_events(chunk)
diff --git a/src/llama_stack_client/lib/inference/utils.py b/src/llama_stack_client/lib/inference/utils.py
deleted file mode 100644
index 24ed7cd1..00000000
--- a/src/llama_stack_client/lib/inference/utils.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the terms described in the LICENSE file in
-# the root directory of this source tree.
-
-import pathlib
-import base64
-
-
-class MessageAttachment:
-    # https://developer.mozilla.org/en-US/docs/Glossary/Base64
-    @classmethod
-    def base64(cls, file_path: str) -> str:
-        path = pathlib.Path(file_path)
-        return base64.b64encode(path.read_bytes()).decode("utf-8")
-
-    # https://developer.mozilla.org/en-US/docs/Web/URI/Schemes/data
-    @classmethod
-    def data_url(cls, media_type: str, file_path: str) -> str:
-        return f"data:{media_type};base64,{cls.base64(file_path)}"
diff --git a/src/llama_stack_client/lib/inline/inline.py b/src/llama_stack_client/lib/inline/inline.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/src/llama_stack_client/lib/stream_printer.py b/src/llama_stack_client/lib/stream_printer.py
deleted file mode 100644
index a08d9663..00000000
--- a/src/llama_stack_client/lib/stream_printer.py
+++ /dev/null
@@ -1,24 +0,0 @@
-from .agents.event_logger import TurnStreamEventPrinter
-from .inference.event_logger import InferenceStreamLogEventPrinter
-
-
-class EventStreamPrinter:
-    @classmethod
-    def gen(cls, event_generator):
-        inference_printer = None
-        turn_printer = None
-        for chunk in event_generator:
-            if not hasattr(chunk, "event"):
-                raise ValueError(f"Unexpected chunk without event: {chunk}")
-
-            event = chunk.event
-            if hasattr(event, "event_type"):
-                if not inference_printer:
-                    inference_printer = InferenceStreamLogEventPrinter()
-                yield from inference_printer.yield_printable_events(chunk)
-            elif hasattr(event, "payload") and hasattr(event.payload, "event_type"):
-                if not turn_printer:
-                    turn_printer = TurnStreamEventPrinter()
-                yield from turn_printer.yield_printable_events(chunk)
-            else:
-                raise ValueError(f"Unsupported event: {event}")
diff --git a/src/llama_stack_client/lib/tools/mcp_oauth.py b/src/llama_stack_client/lib/tools/mcp_oauth.py
deleted file mode 100644
index a3c03416..00000000
--- a/src/llama_stack_client/lib/tools/mcp_oauth.py
+++ /dev/null
@@ -1,297 +0,0 @@
-import asyncio
-import base64
-import hashlib
-import logging
-import os
-import socket
-import threading
-import time
-import urllib.parse
-import uuid
-from http.server import BaseHTTPRequestHandler, HTTPServer
-
-import fire
-import requests
-
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-
-
-class McpOAuthHelper:
-    """A simpler helper for OAuth2 authentication with MCP servers with OAuth discovery."""
-
-    def __init__(self, server_url):
-        self.server_url = server_url
-        self.server_base_url = get_base_url(server_url)
-        self.access_token = None
-
-        # For PKCE (Proof Key for Code Exchange)
-        self.code_verifier = None
-        self.code_challenge = None
-
-        # OAuth client registration
-        self.client_id = None
-        self.client_secret = None
-        self.registered_redirect_uris = []
-
-        # Callback server
-        self.callback_port = find_available_port(8000, 8100)
-        self.redirect_uri = f"http://localhost:{self.callback_port}/callback"
-        self.auth_code = None
-        self.auth_error = None
-        self.http_server = None
-        self.server_thread = None
-
-        # Software statement for DCR
-        self.software_statement = {
-            "software_id": "simple-mcp-client",
-            "software_version": "1.0.0",
-            "software_name": "Simple MCP Client Example",
-            "software_description": "A simple MCP client for demonstration purposes",
-            "software_uri": "https://github.com/example/simple-mcp-client",
-            "redirect_uris": [self.redirect_uri],
-            "client_name": "Simple MCP Client",
-            "client_uri": "https://example.com/mcp-client",
-            "token_endpoint_auth_method": "none",  # Public client
-        }
-
-    def discover_auth_endpoints(self):
-        """
-        Discover the OAuth server metadata according to RFC8414.
-        MCP servers MUST support this discovery mechanism.
-        """
-        well_known_url = f"{self.server_base_url}/.well-known/oauth-authorization-server"
-        response = requests.get(well_known_url)
-        if response.status_code == 200:
-            metadata = response.json()
-            logger.info("✅ Successfully discovered OAuth metadata")
-            return metadata
-
-        raise Exception(f"OAuth metadata discovery failed with status: {response.status_code}")
-
-    def register_client(self, registration_endpoint):
-        headers = {"Content-Type": "application/json"}
-
-        registration_request = {
-            "client_name": self.software_statement["client_name"],
-            "redirect_uris": [self.redirect_uri],
-            "token_endpoint_auth_method": "none",  # Public client
-            "grant_types": ["authorization_code"],
-            "response_types": ["code"],
-            "scope": "openid",
-            "software_id": self.software_statement["software_id"],
-            "software_version": self.software_statement["software_version"],
-        }
-
-        response = requests.post(registration_endpoint, headers=headers, json=registration_request)
-
-        if response.status_code in (201, 200):
-            registration_data = response.json()
-            self.client_id = registration_data.get("client_id")
-            self.client_secret = registration_data.get("client_secret")
-            self.registered_redirect_uris = registration_data.get("redirect_uris", [self.redirect_uri])
-
-            logger.info(f"Client ID: {self.client_id}")
-            return registration_data
-
-        raise Exception(f"Client registration failed: {response.status_code}")
-
-    def generate_pkce_values(self):
-        """Generate PKCE code verifier and challenge."""
-        # Generate a random code verifier
-        code_verifier = base64.urlsafe_b64encode(os.urandom(32)).decode("utf-8").rstrip("=")
-
-        # Generate the code challenge using SHA-256
-        code_challenge_digest = hashlib.sha256(code_verifier.encode("utf-8")).digest()
-        code_challenge = base64.urlsafe_b64encode(code_challenge_digest).decode("utf-8").rstrip("=")
-
-        self.code_verifier = code_verifier
-        self.code_challenge = code_challenge
-
-        return code_verifier, code_challenge
-
-    def stop_server(self):
-        time.sleep(1)
-        if self.http_server:
-            self.http_server.shutdown()
-
-    def start_callback_server(self):
-        def auth_callback(auth_code: str | None, error: str | None):
-            logger.info(f"Authorization callback received: auth_code={auth_code}, error={error}")
-            self.auth_code = auth_code
-            self.auth_error = error
-            threading.Thread(target=self.stop_server).start()
-
-        self.http_server = CallbackServer(("localhost", self.callback_port), auth_callback)
-
-        self.server_thread = threading.Thread(target=self.http_server.serve_forever)
-        self.server_thread.daemon = True
-        self.server_thread.start()
-
-        logger.info(f"🌐 Callback server started on port {self.callback_port}")
-
-    def exchange_code_for_token(self, auth_code, token_endpoint):
-        logger.info("Exchanging authorization code for access token...")
-
-        data = {
-            "grant_type": "authorization_code",
-            "code": auth_code,
-            "redirect_uri": self.redirect_uri,
-            "client_id": self.client_id,
-            "code_verifier": self.code_verifier,
-        }
-        if self.client_secret:
-            data["client_secret"] = self.client_secret
-
-        headers = {"Content-Type": "application/x-www-form-urlencoded"}
-
-        response = requests.post(token_endpoint, data=data, headers=headers)
-        if response.status_code == 200:
-            token_data = response.json()
-            self.access_token = token_data.get("access_token")
-            logger.info(f"✅ Successfully obtained access token: {self.access_token}")
-            return self.access_token
-
-        raise Exception(f"Failed to exchange code for token: {response.status_code}")
-
-    def initiate_auth_flow(self):
-        auth_metadata = self.discover_auth_endpoints()
-        registration_endpoint = auth_metadata.get("registration_endpoint")
-        if registration_endpoint and not self.client_id:
-            self.register_client(registration_endpoint)
-
-        self.generate_pkce_values()
-
-        self.start_callback_server()
-
-        auth_url = auth_metadata.get("authorization_endpoint")
-        if not auth_url:
-            raise Exception("No authorization endpoint in metadata")
-
-        token_endpoint = auth_metadata.get("token_endpoint")
-        if not token_endpoint:
-            raise Exception("No token endpoint in metadata")
-
-        params = {
-            "client_id": self.client_id,
-            "redirect_uri": self.redirect_uri,
-            "response_type": "code",
-            "state": str(uuid.uuid4()),  # Random state
-            "code_challenge": self.code_challenge,
-            "code_challenge_method": "S256",
-            "scope": "openid",  # Add appropriate scopes for Asana
-        }
-
-        full_auth_url = f"{auth_url}?{urllib.parse.urlencode(params)}"
-        logger.info(f"Opening browser to authorize URL: {full_auth_url}")
-        logger.info("Flow will continue after you log in")
-
-        import webbrowser
-
-        webbrowser.open(full_auth_url)
-        self.server_thread.join(60)  # Wait up to 1 minute
-
-        if self.auth_code:
-            return self.exchange_code_for_token(self.auth_code, token_endpoint)
-        elif self.auth_error:
-            logger.error(f"Authorization failed: {self.auth_error}")
-            return None
-        else:
-            logger.error("Timed out waiting for authorization")
-            return None
-
-
-def get_base_url(url):
-    parsed_url = urllib.parse.urlparse(url)
-    return f"{parsed_url.scheme}://{parsed_url.netloc}"
-
-
-def find_available_port(start_port, end_port):
-    """Find an available port within a range."""
-    for port in range(start_port, end_port + 1):
-        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
-            try:
-                s.bind(("localhost", port))
-                return port
-            except socket.error:
-                continue
-    raise RuntimeError(f"No available ports in range {start_port}-{end_port}")
-
-
-class CallbackServer(HTTPServer):
-    class OAuthCallbackHandler(BaseHTTPRequestHandler):
-        def do_GET(self):
-            parsed_path = urllib.parse.urlparse(self.path)
-            query_params = urllib.parse.parse_qs(parsed_path.query)
-
-            if parsed_path.path == "/callback":
-                auth_code = query_params.get("code", [None])[0]
-                error = query_params.get("error", [None])[0]
-
-                self.send_response(200)
-                self.send_header("Content-type", "text/html")
-                self.end_headers()
-
-                if error:
-                    self.wfile.write(b"<html><head><title>Authorization Failed</title></head>")
-                    self.wfile.write(f"<body><h1>Authorization Failed</h1><p>Error: {error}</p></body></html>".encode())
-                    self.server.auth_code_callback(None, error)
-                elif auth_code:
-                    self.wfile.write(b"<html><head><title>Authorization Successful</title></head>")
-                    self.wfile.write(
-                        b"<body><h1>Authorization Successful</h1><p>You can close this window now.</p></body></html>"
-                    )
-                    # Call the callback with the auth code
-                    self.server.auth_code_callback(auth_code, None)
-                else:
-                    self.wfile.write(b"<html><head><title>Authorization Failed</title></head>")
-                    self.wfile.write(
-                        b"<body><h1>Authorization Failed</h1><p>No authorization code received.</p></body></html>"
-                    )
-                    self.server.auth_code_callback(None, "No authorization code received")
-            else:
-                self.send_response(404)
-                self.end_headers()
-
-        def log_message(self, format, *args):
-            """Override to suppress HTTP server logs."""
-            return
-
-    def __init__(self, server_address, auth_code_callback):
-        self.auth_code_callback = auth_code_callback
-        super().__init__(server_address, self.OAuthCallbackHandler)
-
-
-def get_oauth_token_for_mcp_server(url: str) -> str | None:
-    helper = McpOAuthHelper(url)
-    return helper.initiate_auth_flow()
-
-
-async def run_main(url: str):
-    from mcp import ClientSession
-    from mcp.client.sse import sse_client
-
-    token = get_oauth_token_for_mcp_server(url)
-    if not token:
-        return
-
-    headers = {
-        "Authorization": f"Bearer {token}",
-    }
-
-    async with sse_client(url, headers=headers) as streams:
-        async with ClientSession(*streams) as session:
-            await session.initialize()
-            result = await session.list_tools()
-
-            logger.info(f"Tools: {len(result.tools)}, showing first 5:")
-            for t in result.tools[:5]:
-                logger.info(f"{t.name}: {t.description}")
-
-
-def main(url: str):
-    asyncio.run(run_main(url))
-
-
-if __name__ == "__main__":
-    fire.Fire(main)
diff --git a/src/llama_stack_client/pagination.py b/src/llama_stack_client/pagination.py
deleted file mode 100644
index c2f7fe80..00000000
--- a/src/llama_stack_client/pagination.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Generic, TypeVar, Optional
-from typing_extensions import override
-
-from ._base_client import BasePage, PageInfo, BaseSyncPage, BaseAsyncPage
-
-__all__ = ["SyncDatasetsIterrows", "AsyncDatasetsIterrows"]
-
-_T = TypeVar("_T")
-
-
-class SyncDatasetsIterrows(BaseSyncPage[_T], BasePage[_T], Generic[_T]):
-    data: List[_T]
-    next_index: Optional[int] = None
-
-    @override
-    def _get_page_items(self) -> List[_T]:
-        data = self.data
-        if not data:
-            return []
-        return data
-
-    @override
-    def next_page_info(self) -> Optional[PageInfo]:
-        next_index = self.next_index
-        if not next_index:
-            return None
-
-        return PageInfo(params={"start_index": next_index})
-
-
-class AsyncDatasetsIterrows(BaseAsyncPage[_T], BasePage[_T], Generic[_T]):
-    data: List[_T]
-    next_index: Optional[int] = None
-
-    @override
-    def _get_page_items(self) -> List[_T]:
-        data = self.data
-        if not data:
-            return []
-        return data
-
-    @override
-    def next_page_info(self) -> Optional[PageInfo]:
-        next_index = self.next_index
-        if not next_index:
-            return None
-
-        return PageInfo(params={"start_index": next_index})
diff --git a/src/llama_stack_client/resources/__init__.py b/src/llama_stack_client/resources/__init__.py
deleted file mode 100644
index 23f61be1..00000000
--- a/src/llama_stack_client/resources/__init__.py
+++ /dev/null
@@ -1,383 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .chat import (
-    ChatResource,
-    AsyncChatResource,
-    ChatResourceWithRawResponse,
-    AsyncChatResourceWithRawResponse,
-    ChatResourceWithStreamingResponse,
-    AsyncChatResourceWithStreamingResponse,
-)
-from .eval import (
-    EvalResource,
-    AsyncEvalResource,
-    EvalResourceWithRawResponse,
-    AsyncEvalResourceWithRawResponse,
-    EvalResourceWithStreamingResponse,
-    AsyncEvalResourceWithStreamingResponse,
-)
-from .files import (
-    FilesResource,
-    AsyncFilesResource,
-    FilesResourceWithRawResponse,
-    AsyncFilesResourceWithRawResponse,
-    FilesResourceWithStreamingResponse,
-    AsyncFilesResourceWithStreamingResponse,
-)
-from .tools import (
-    ToolsResource,
-    AsyncToolsResource,
-    ToolsResourceWithRawResponse,
-    AsyncToolsResourceWithRawResponse,
-    ToolsResourceWithStreamingResponse,
-    AsyncToolsResourceWithStreamingResponse,
-)
-from .agents import (
-    AgentsResource,
-    AsyncAgentsResource,
-    AgentsResourceWithRawResponse,
-    AsyncAgentsResourceWithRawResponse,
-    AgentsResourceWithStreamingResponse,
-    AsyncAgentsResourceWithStreamingResponse,
-)
-from .models import (
-    ModelsResource,
-    AsyncModelsResource,
-    ModelsResourceWithRawResponse,
-    AsyncModelsResourceWithRawResponse,
-    ModelsResourceWithStreamingResponse,
-    AsyncModelsResourceWithStreamingResponse,
-)
-from .routes import (
-    RoutesResource,
-    AsyncRoutesResource,
-    RoutesResourceWithRawResponse,
-    AsyncRoutesResourceWithRawResponse,
-    RoutesResourceWithStreamingResponse,
-    AsyncRoutesResourceWithStreamingResponse,
-)
-from .safety import (
-    SafetyResource,
-    AsyncSafetyResource,
-    SafetyResourceWithRawResponse,
-    AsyncSafetyResourceWithRawResponse,
-    SafetyResourceWithStreamingResponse,
-    AsyncSafetyResourceWithStreamingResponse,
-)
-from .inspect import (
-    InspectResource,
-    AsyncInspectResource,
-    InspectResourceWithRawResponse,
-    AsyncInspectResourceWithRawResponse,
-    InspectResourceWithStreamingResponse,
-    AsyncInspectResourceWithStreamingResponse,
-)
-from .scoring import (
-    ScoringResource,
-    AsyncScoringResource,
-    ScoringResourceWithRawResponse,
-    AsyncScoringResourceWithRawResponse,
-    ScoringResourceWithStreamingResponse,
-    AsyncScoringResourceWithStreamingResponse,
-)
-from .shields import (
-    ShieldsResource,
-    AsyncShieldsResource,
-    ShieldsResourceWithRawResponse,
-    AsyncShieldsResourceWithRawResponse,
-    ShieldsResourceWithStreamingResponse,
-    AsyncShieldsResourceWithStreamingResponse,
-)
-from .datasets import (
-    DatasetsResource,
-    AsyncDatasetsResource,
-    DatasetsResourceWithRawResponse,
-    AsyncDatasetsResourceWithRawResponse,
-    DatasetsResourceWithStreamingResponse,
-    AsyncDatasetsResourceWithStreamingResponse,
-)
-from .inference import (
-    InferenceResource,
-    AsyncInferenceResource,
-    InferenceResourceWithRawResponse,
-    AsyncInferenceResourceWithRawResponse,
-    InferenceResourceWithStreamingResponse,
-    AsyncInferenceResourceWithStreamingResponse,
-)
-from .providers import (
-    ProvidersResource,
-    AsyncProvidersResource,
-    ProvidersResourceWithRawResponse,
-    AsyncProvidersResourceWithRawResponse,
-    ProvidersResourceWithStreamingResponse,
-    AsyncProvidersResourceWithStreamingResponse,
-)
-from .responses import (
-    ResponsesResource,
-    AsyncResponsesResource,
-    ResponsesResourceWithRawResponse,
-    AsyncResponsesResourceWithRawResponse,
-    ResponsesResourceWithStreamingResponse,
-    AsyncResponsesResourceWithStreamingResponse,
-)
-from .telemetry import (
-    TelemetryResource,
-    AsyncTelemetryResource,
-    TelemetryResourceWithRawResponse,
-    AsyncTelemetryResourceWithRawResponse,
-    TelemetryResourceWithStreamingResponse,
-    AsyncTelemetryResourceWithStreamingResponse,
-)
-from .vector_io import (
-    VectorIoResource,
-    AsyncVectorIoResource,
-    VectorIoResourceWithRawResponse,
-    AsyncVectorIoResourceWithRawResponse,
-    VectorIoResourceWithStreamingResponse,
-    AsyncVectorIoResourceWithStreamingResponse,
-)
-from .benchmarks import (
-    BenchmarksResource,
-    AsyncBenchmarksResource,
-    BenchmarksResourceWithRawResponse,
-    AsyncBenchmarksResourceWithRawResponse,
-    BenchmarksResourceWithStreamingResponse,
-    AsyncBenchmarksResourceWithStreamingResponse,
-)
-from .embeddings import (
-    EmbeddingsResource,
-    AsyncEmbeddingsResource,
-    EmbeddingsResourceWithRawResponse,
-    AsyncEmbeddingsResourceWithRawResponse,
-    EmbeddingsResourceWithStreamingResponse,
-    AsyncEmbeddingsResourceWithStreamingResponse,
-)
-from .toolgroups import (
-    ToolgroupsResource,
-    AsyncToolgroupsResource,
-    ToolgroupsResourceWithRawResponse,
-    AsyncToolgroupsResourceWithRawResponse,
-    ToolgroupsResourceWithStreamingResponse,
-    AsyncToolgroupsResourceWithStreamingResponse,
-)
-from .vector_dbs import (
-    VectorDBsResource,
-    AsyncVectorDBsResource,
-    VectorDBsResourceWithRawResponse,
-    AsyncVectorDBsResourceWithRawResponse,
-    VectorDBsResourceWithStreamingResponse,
-    AsyncVectorDBsResourceWithStreamingResponse,
-)
-from .completions import (
-    CompletionsResource,
-    AsyncCompletionsResource,
-    CompletionsResourceWithRawResponse,
-    AsyncCompletionsResourceWithRawResponse,
-    CompletionsResourceWithStreamingResponse,
-    AsyncCompletionsResourceWithStreamingResponse,
-)
-from .tool_runtime import (
-    ToolRuntimeResource,
-    AsyncToolRuntimeResource,
-    ToolRuntimeResourceWithRawResponse,
-    AsyncToolRuntimeResourceWithRawResponse,
-    ToolRuntimeResourceWithStreamingResponse,
-    AsyncToolRuntimeResourceWithStreamingResponse,
-)
-from .post_training import (
-    PostTrainingResource,
-    AsyncPostTrainingResource,
-    PostTrainingResourceWithRawResponse,
-    AsyncPostTrainingResourceWithRawResponse,
-    PostTrainingResourceWithStreamingResponse,
-    AsyncPostTrainingResourceWithStreamingResponse,
-)
-from .vector_stores import (
-    VectorStoresResource,
-    AsyncVectorStoresResource,
-    VectorStoresResourceWithRawResponse,
-    AsyncVectorStoresResourceWithRawResponse,
-    VectorStoresResourceWithStreamingResponse,
-    AsyncVectorStoresResourceWithStreamingResponse,
-)
-from .scoring_functions import (
-    ScoringFunctionsResource,
-    AsyncScoringFunctionsResource,
-    ScoringFunctionsResourceWithRawResponse,
-    AsyncScoringFunctionsResourceWithRawResponse,
-    ScoringFunctionsResourceWithStreamingResponse,
-    AsyncScoringFunctionsResourceWithStreamingResponse,
-)
-from .synthetic_data_generation import (
-    SyntheticDataGenerationResource,
-    AsyncSyntheticDataGenerationResource,
-    SyntheticDataGenerationResourceWithRawResponse,
-    AsyncSyntheticDataGenerationResourceWithRawResponse,
-    SyntheticDataGenerationResourceWithStreamingResponse,
-    AsyncSyntheticDataGenerationResourceWithStreamingResponse,
-)
-
-__all__ = [
-    "ToolgroupsResource",
-    "AsyncToolgroupsResource",
-    "ToolgroupsResourceWithRawResponse",
-    "AsyncToolgroupsResourceWithRawResponse",
-    "ToolgroupsResourceWithStreamingResponse",
-    "AsyncToolgroupsResourceWithStreamingResponse",
-    "ToolsResource",
-    "AsyncToolsResource",
-    "ToolsResourceWithRawResponse",
-    "AsyncToolsResourceWithRawResponse",
-    "ToolsResourceWithStreamingResponse",
-    "AsyncToolsResourceWithStreamingResponse",
-    "ToolRuntimeResource",
-    "AsyncToolRuntimeResource",
-    "ToolRuntimeResourceWithRawResponse",
-    "AsyncToolRuntimeResourceWithRawResponse",
-    "ToolRuntimeResourceWithStreamingResponse",
-    "AsyncToolRuntimeResourceWithStreamingResponse",
-    "ResponsesResource",
-    "AsyncResponsesResource",
-    "ResponsesResourceWithRawResponse",
-    "AsyncResponsesResourceWithRawResponse",
-    "ResponsesResourceWithStreamingResponse",
-    "AsyncResponsesResourceWithStreamingResponse",
-    "AgentsResource",
-    "AsyncAgentsResource",
-    "AgentsResourceWithRawResponse",
-    "AsyncAgentsResourceWithRawResponse",
-    "AgentsResourceWithStreamingResponse",
-    "AsyncAgentsResourceWithStreamingResponse",
-    "DatasetsResource",
-    "AsyncDatasetsResource",
-    "DatasetsResourceWithRawResponse",
-    "AsyncDatasetsResourceWithRawResponse",
-    "DatasetsResourceWithStreamingResponse",
-    "AsyncDatasetsResourceWithStreamingResponse",
-    "EvalResource",
-    "AsyncEvalResource",
-    "EvalResourceWithRawResponse",
-    "AsyncEvalResourceWithRawResponse",
-    "EvalResourceWithStreamingResponse",
-    "AsyncEvalResourceWithStreamingResponse",
-    "InspectResource",
-    "AsyncInspectResource",
-    "InspectResourceWithRawResponse",
-    "AsyncInspectResourceWithRawResponse",
-    "InspectResourceWithStreamingResponse",
-    "AsyncInspectResourceWithStreamingResponse",
-    "InferenceResource",
-    "AsyncInferenceResource",
-    "InferenceResourceWithRawResponse",
-    "AsyncInferenceResourceWithRawResponse",
-    "InferenceResourceWithStreamingResponse",
-    "AsyncInferenceResourceWithStreamingResponse",
-    "EmbeddingsResource",
-    "AsyncEmbeddingsResource",
-    "EmbeddingsResourceWithRawResponse",
-    "AsyncEmbeddingsResourceWithRawResponse",
-    "EmbeddingsResourceWithStreamingResponse",
-    "AsyncEmbeddingsResourceWithStreamingResponse",
-    "ChatResource",
-    "AsyncChatResource",
-    "ChatResourceWithRawResponse",
-    "AsyncChatResourceWithRawResponse",
-    "ChatResourceWithStreamingResponse",
-    "AsyncChatResourceWithStreamingResponse",
-    "CompletionsResource",
-    "AsyncCompletionsResource",
-    "CompletionsResourceWithRawResponse",
-    "AsyncCompletionsResourceWithRawResponse",
-    "CompletionsResourceWithStreamingResponse",
-    "AsyncCompletionsResourceWithStreamingResponse",
-    "VectorIoResource",
-    "AsyncVectorIoResource",
-    "VectorIoResourceWithRawResponse",
-    "AsyncVectorIoResourceWithRawResponse",
-    "VectorIoResourceWithStreamingResponse",
-    "AsyncVectorIoResourceWithStreamingResponse",
-    "VectorDBsResource",
-    "AsyncVectorDBsResource",
-    "VectorDBsResourceWithRawResponse",
-    "AsyncVectorDBsResourceWithRawResponse",
-    "VectorDBsResourceWithStreamingResponse",
-    "AsyncVectorDBsResourceWithStreamingResponse",
-    "VectorStoresResource",
-    "AsyncVectorStoresResource",
-    "VectorStoresResourceWithRawResponse",
-    "AsyncVectorStoresResourceWithRawResponse",
-    "VectorStoresResourceWithStreamingResponse",
-    "AsyncVectorStoresResourceWithStreamingResponse",
-    "ModelsResource",
-    "AsyncModelsResource",
-    "ModelsResourceWithRawResponse",
-    "AsyncModelsResourceWithRawResponse",
-    "ModelsResourceWithStreamingResponse",
-    "AsyncModelsResourceWithStreamingResponse",
-    "PostTrainingResource",
-    "AsyncPostTrainingResource",
-    "PostTrainingResourceWithRawResponse",
-    "AsyncPostTrainingResourceWithRawResponse",
-    "PostTrainingResourceWithStreamingResponse",
-    "AsyncPostTrainingResourceWithStreamingResponse",
-    "ProvidersResource",
-    "AsyncProvidersResource",
-    "ProvidersResourceWithRawResponse",
-    "AsyncProvidersResourceWithRawResponse",
-    "ProvidersResourceWithStreamingResponse",
-    "AsyncProvidersResourceWithStreamingResponse",
-    "RoutesResource",
-    "AsyncRoutesResource",
-    "RoutesResourceWithRawResponse",
-    "AsyncRoutesResourceWithRawResponse",
-    "RoutesResourceWithStreamingResponse",
-    "AsyncRoutesResourceWithStreamingResponse",
-    "SafetyResource",
-    "AsyncSafetyResource",
-    "SafetyResourceWithRawResponse",
-    "AsyncSafetyResourceWithRawResponse",
-    "SafetyResourceWithStreamingResponse",
-    "AsyncSafetyResourceWithStreamingResponse",
-    "ShieldsResource",
-    "AsyncShieldsResource",
-    "ShieldsResourceWithRawResponse",
-    "AsyncShieldsResourceWithRawResponse",
-    "ShieldsResourceWithStreamingResponse",
-    "AsyncShieldsResourceWithStreamingResponse",
-    "SyntheticDataGenerationResource",
-    "AsyncSyntheticDataGenerationResource",
-    "SyntheticDataGenerationResourceWithRawResponse",
-    "AsyncSyntheticDataGenerationResourceWithRawResponse",
-    "SyntheticDataGenerationResourceWithStreamingResponse",
-    "AsyncSyntheticDataGenerationResourceWithStreamingResponse",
-    "TelemetryResource",
-    "AsyncTelemetryResource",
-    "TelemetryResourceWithRawResponse",
-    "AsyncTelemetryResourceWithRawResponse",
-    "TelemetryResourceWithStreamingResponse",
-    "AsyncTelemetryResourceWithStreamingResponse",
-    "ScoringResource",
-    "AsyncScoringResource",
-    "ScoringResourceWithRawResponse",
-    "AsyncScoringResourceWithRawResponse",
-    "ScoringResourceWithStreamingResponse",
-    "AsyncScoringResourceWithStreamingResponse",
-    "ScoringFunctionsResource",
-    "AsyncScoringFunctionsResource",
-    "ScoringFunctionsResourceWithRawResponse",
-    "AsyncScoringFunctionsResourceWithRawResponse",
-    "ScoringFunctionsResourceWithStreamingResponse",
-    "AsyncScoringFunctionsResourceWithStreamingResponse",
-    "BenchmarksResource",
-    "AsyncBenchmarksResource",
-    "BenchmarksResourceWithRawResponse",
-    "AsyncBenchmarksResourceWithRawResponse",
-    "BenchmarksResourceWithStreamingResponse",
-    "AsyncBenchmarksResourceWithStreamingResponse",
-    "FilesResource",
-    "AsyncFilesResource",
-    "FilesResourceWithRawResponse",
-    "AsyncFilesResourceWithRawResponse",
-    "FilesResourceWithStreamingResponse",
-    "AsyncFilesResourceWithStreamingResponse",
-]
diff --git a/src/llama_stack_client/resources/agents/__init__.py b/src/llama_stack_client/resources/agents/__init__.py
deleted file mode 100644
index 17f0098f..00000000
--- a/src/llama_stack_client/resources/agents/__init__.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .turn import (
-    TurnResource,
-    AsyncTurnResource,
-    TurnResourceWithRawResponse,
-    AsyncTurnResourceWithRawResponse,
-    TurnResourceWithStreamingResponse,
-    AsyncTurnResourceWithStreamingResponse,
-)
-from .steps import (
-    StepsResource,
-    AsyncStepsResource,
-    StepsResourceWithRawResponse,
-    AsyncStepsResourceWithRawResponse,
-    StepsResourceWithStreamingResponse,
-    AsyncStepsResourceWithStreamingResponse,
-)
-from .agents import (
-    AgentsResource,
-    AsyncAgentsResource,
-    AgentsResourceWithRawResponse,
-    AsyncAgentsResourceWithRawResponse,
-    AgentsResourceWithStreamingResponse,
-    AsyncAgentsResourceWithStreamingResponse,
-)
-from .session import (
-    SessionResource,
-    AsyncSessionResource,
-    SessionResourceWithRawResponse,
-    AsyncSessionResourceWithRawResponse,
-    SessionResourceWithStreamingResponse,
-    AsyncSessionResourceWithStreamingResponse,
-)
-
-__all__ = [
-    "SessionResource",
-    "AsyncSessionResource",
-    "SessionResourceWithRawResponse",
-    "AsyncSessionResourceWithRawResponse",
-    "SessionResourceWithStreamingResponse",
-    "AsyncSessionResourceWithStreamingResponse",
-    "StepsResource",
-    "AsyncStepsResource",
-    "StepsResourceWithRawResponse",
-    "AsyncStepsResourceWithRawResponse",
-    "StepsResourceWithStreamingResponse",
-    "AsyncStepsResourceWithStreamingResponse",
-    "TurnResource",
-    "AsyncTurnResource",
-    "TurnResourceWithRawResponse",
-    "AsyncTurnResourceWithRawResponse",
-    "TurnResourceWithStreamingResponse",
-    "AsyncTurnResourceWithStreamingResponse",
-    "AgentsResource",
-    "AsyncAgentsResource",
-    "AgentsResourceWithRawResponse",
-    "AsyncAgentsResourceWithRawResponse",
-    "AgentsResourceWithStreamingResponse",
-    "AsyncAgentsResourceWithStreamingResponse",
-]
diff --git a/src/llama_stack_client/resources/agents/agents.py b/src/llama_stack_client/resources/agents/agents.py
deleted file mode 100644
index 5b34cea8..00000000
--- a/src/llama_stack_client/resources/agents/agents.py
+++ /dev/null
@@ -1,344 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import httpx
-
-from .turn import (
-    TurnResource,
-    AsyncTurnResource,
-    TurnResourceWithRawResponse,
-    AsyncTurnResourceWithRawResponse,
-    TurnResourceWithStreamingResponse,
-    AsyncTurnResourceWithStreamingResponse,
-)
-from .steps import (
-    StepsResource,
-    AsyncStepsResource,
-    StepsResourceWithRawResponse,
-    AsyncStepsResourceWithRawResponse,
-    StepsResourceWithStreamingResponse,
-    AsyncStepsResourceWithStreamingResponse,
-)
-from ...types import agent_create_params
-from .session import (
-    SessionResource,
-    AsyncSessionResource,
-    SessionResourceWithRawResponse,
-    AsyncSessionResourceWithRawResponse,
-    SessionResourceWithStreamingResponse,
-    AsyncSessionResourceWithStreamingResponse,
-)
-from ..._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
-from ..._utils import maybe_transform, async_maybe_transform
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from ..._base_client import make_request_options
-from ...types.agent_create_response import AgentCreateResponse
-from ...types.shared_params.agent_config import AgentConfig
-
-__all__ = ["AgentsResource", "AsyncAgentsResource"]
-
-
-class AgentsResource(SyncAPIResource):
-    @cached_property
-    def session(self) -> SessionResource:
-        return SessionResource(self._client)
-
-    @cached_property
-    def steps(self) -> StepsResource:
-        return StepsResource(self._client)
-
-    @cached_property
-    def turn(self) -> TurnResource:
-        return TurnResource(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> AgentsResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AgentsResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AgentsResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AgentsResourceWithStreamingResponse(self)
-
-    def create(
-        self,
-        *,
-        agent_config: AgentConfig,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AgentCreateResponse:
-        """
-        Create an agent with the given configuration.
-
-        Args:
-          agent_config: The configuration for the agent.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._post(
-            "/v1/agents",
-            body=maybe_transform({"agent_config": agent_config}, agent_create_params.AgentCreateParams),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=AgentCreateResponse,
-        )
-
-    def delete(
-        self,
-        agent_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> None:
-        """
-        Delete an agent by its ID and its associated sessions and turns.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not agent_id:
-            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        return self._delete(
-            f"/v1/agents/{agent_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=NoneType,
-        )
-
-
-class AsyncAgentsResource(AsyncAPIResource):
-    @cached_property
-    def session(self) -> AsyncSessionResource:
-        return AsyncSessionResource(self._client)
-
-    @cached_property
-    def steps(self) -> AsyncStepsResource:
-        return AsyncStepsResource(self._client)
-
-    @cached_property
-    def turn(self) -> AsyncTurnResource:
-        return AsyncTurnResource(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> AsyncAgentsResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncAgentsResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncAgentsResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncAgentsResourceWithStreamingResponse(self)
-
-    async def create(
-        self,
-        *,
-        agent_config: AgentConfig,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AgentCreateResponse:
-        """
-        Create an agent with the given configuration.
-
-        Args:
-          agent_config: The configuration for the agent.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return await self._post(
-            "/v1/agents",
-            body=await async_maybe_transform({"agent_config": agent_config}, agent_create_params.AgentCreateParams),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=AgentCreateResponse,
-        )
-
-    async def delete(
-        self,
-        agent_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> None:
-        """
-        Delete an agent by its ID and its associated sessions and turns.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not agent_id:
-            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        return await self._delete(
-            f"/v1/agents/{agent_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=NoneType,
-        )
-
-
-class AgentsResourceWithRawResponse:
-    def __init__(self, agents: AgentsResource) -> None:
-        self._agents = agents
-
-        self.create = to_raw_response_wrapper(
-            agents.create,
-        )
-        self.delete = to_raw_response_wrapper(
-            agents.delete,
-        )
-
-    @cached_property
-    def session(self) -> SessionResourceWithRawResponse:
-        return SessionResourceWithRawResponse(self._agents.session)
-
-    @cached_property
-    def steps(self) -> StepsResourceWithRawResponse:
-        return StepsResourceWithRawResponse(self._agents.steps)
-
-    @cached_property
-    def turn(self) -> TurnResourceWithRawResponse:
-        return TurnResourceWithRawResponse(self._agents.turn)
-
-
-class AsyncAgentsResourceWithRawResponse:
-    def __init__(self, agents: AsyncAgentsResource) -> None:
-        self._agents = agents
-
-        self.create = async_to_raw_response_wrapper(
-            agents.create,
-        )
-        self.delete = async_to_raw_response_wrapper(
-            agents.delete,
-        )
-
-    @cached_property
-    def session(self) -> AsyncSessionResourceWithRawResponse:
-        return AsyncSessionResourceWithRawResponse(self._agents.session)
-
-    @cached_property
-    def steps(self) -> AsyncStepsResourceWithRawResponse:
-        return AsyncStepsResourceWithRawResponse(self._agents.steps)
-
-    @cached_property
-    def turn(self) -> AsyncTurnResourceWithRawResponse:
-        return AsyncTurnResourceWithRawResponse(self._agents.turn)
-
-
-class AgentsResourceWithStreamingResponse:
-    def __init__(self, agents: AgentsResource) -> None:
-        self._agents = agents
-
-        self.create = to_streamed_response_wrapper(
-            agents.create,
-        )
-        self.delete = to_streamed_response_wrapper(
-            agents.delete,
-        )
-
-    @cached_property
-    def session(self) -> SessionResourceWithStreamingResponse:
-        return SessionResourceWithStreamingResponse(self._agents.session)
-
-    @cached_property
-    def steps(self) -> StepsResourceWithStreamingResponse:
-        return StepsResourceWithStreamingResponse(self._agents.steps)
-
-    @cached_property
-    def turn(self) -> TurnResourceWithStreamingResponse:
-        return TurnResourceWithStreamingResponse(self._agents.turn)
-
-
-class AsyncAgentsResourceWithStreamingResponse:
-    def __init__(self, agents: AsyncAgentsResource) -> None:
-        self._agents = agents
-
-        self.create = async_to_streamed_response_wrapper(
-            agents.create,
-        )
-        self.delete = async_to_streamed_response_wrapper(
-            agents.delete,
-        )
-
-    @cached_property
-    def session(self) -> AsyncSessionResourceWithStreamingResponse:
-        return AsyncSessionResourceWithStreamingResponse(self._agents.session)
-
-    @cached_property
-    def steps(self) -> AsyncStepsResourceWithStreamingResponse:
-        return AsyncStepsResourceWithStreamingResponse(self._agents.steps)
-
-    @cached_property
-    def turn(self) -> AsyncTurnResourceWithStreamingResponse:
-        return AsyncTurnResourceWithStreamingResponse(self._agents.turn)
diff --git a/src/llama_stack_client/resources/agents/session.py b/src/llama_stack_client/resources/agents/session.py
deleted file mode 100644
index ccefeb0b..00000000
--- a/src/llama_stack_client/resources/agents/session.py
+++ /dev/null
@@ -1,362 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import List
-
-import httpx
-
-from ..._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
-from ..._utils import maybe_transform, async_maybe_transform
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from ..._base_client import make_request_options
-from ...types.agents import session_create_params, session_retrieve_params
-from ...types.agents.session import Session
-from ...types.agents.session_create_response import SessionCreateResponse
-
-__all__ = ["SessionResource", "AsyncSessionResource"]
-
-
-class SessionResource(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> SessionResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return SessionResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> SessionResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return SessionResourceWithStreamingResponse(self)
-
-    def create(
-        self,
-        agent_id: str,
-        *,
-        session_name: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> SessionCreateResponse:
-        """
-        Create a new session for an agent.
-
-        Args:
-          session_name: The name of the session to create.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not agent_id:
-            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
-        return self._post(
-            f"/v1/agents/{agent_id}/session",
-            body=maybe_transform({"session_name": session_name}, session_create_params.SessionCreateParams),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=SessionCreateResponse,
-        )
-
-    def retrieve(
-        self,
-        session_id: str,
-        *,
-        agent_id: str,
-        turn_ids: List[str] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Session:
-        """
-        Retrieve an agent session by its ID.
-
-        Args:
-          turn_ids: (Optional) List of turn IDs to filter the session by.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not agent_id:
-            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
-        if not session_id:
-            raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
-        return self._get(
-            f"/v1/agents/{agent_id}/session/{session_id}",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform({"turn_ids": turn_ids}, session_retrieve_params.SessionRetrieveParams),
-            ),
-            cast_to=Session,
-        )
-
-    def delete(
-        self,
-        session_id: str,
-        *,
-        agent_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> None:
-        """
-        Delete an agent session by its ID and its associated turns.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not agent_id:
-            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
-        if not session_id:
-            raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        return self._delete(
-            f"/v1/agents/{agent_id}/session/{session_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=NoneType,
-        )
-
-
-class AsyncSessionResource(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncSessionResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncSessionResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncSessionResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncSessionResourceWithStreamingResponse(self)
-
-    async def create(
-        self,
-        agent_id: str,
-        *,
-        session_name: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> SessionCreateResponse:
-        """
-        Create a new session for an agent.
-
-        Args:
-          session_name: The name of the session to create.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not agent_id:
-            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
-        return await self._post(
-            f"/v1/agents/{agent_id}/session",
-            body=await async_maybe_transform({"session_name": session_name}, session_create_params.SessionCreateParams),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=SessionCreateResponse,
-        )
-
-    async def retrieve(
-        self,
-        session_id: str,
-        *,
-        agent_id: str,
-        turn_ids: List[str] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Session:
-        """
-        Retrieve an agent session by its ID.
-
-        Args:
-          turn_ids: (Optional) List of turn IDs to filter the session by.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not agent_id:
-            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
-        if not session_id:
-            raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
-        return await self._get(
-            f"/v1/agents/{agent_id}/session/{session_id}",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=await async_maybe_transform(
-                    {"turn_ids": turn_ids}, session_retrieve_params.SessionRetrieveParams
-                ),
-            ),
-            cast_to=Session,
-        )
-
-    async def delete(
-        self,
-        session_id: str,
-        *,
-        agent_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> None:
-        """
-        Delete an agent session by its ID and its associated turns.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not agent_id:
-            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
-        if not session_id:
-            raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        return await self._delete(
-            f"/v1/agents/{agent_id}/session/{session_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=NoneType,
-        )
-
-
-class SessionResourceWithRawResponse:
-    def __init__(self, session: SessionResource) -> None:
-        self._session = session
-
-        self.create = to_raw_response_wrapper(
-            session.create,
-        )
-        self.retrieve = to_raw_response_wrapper(
-            session.retrieve,
-        )
-        self.delete = to_raw_response_wrapper(
-            session.delete,
-        )
-
-
-class AsyncSessionResourceWithRawResponse:
-    def __init__(self, session: AsyncSessionResource) -> None:
-        self._session = session
-
-        self.create = async_to_raw_response_wrapper(
-            session.create,
-        )
-        self.retrieve = async_to_raw_response_wrapper(
-            session.retrieve,
-        )
-        self.delete = async_to_raw_response_wrapper(
-            session.delete,
-        )
-
-
-class SessionResourceWithStreamingResponse:
-    def __init__(self, session: SessionResource) -> None:
-        self._session = session
-
-        self.create = to_streamed_response_wrapper(
-            session.create,
-        )
-        self.retrieve = to_streamed_response_wrapper(
-            session.retrieve,
-        )
-        self.delete = to_streamed_response_wrapper(
-            session.delete,
-        )
-
-
-class AsyncSessionResourceWithStreamingResponse:
-    def __init__(self, session: AsyncSessionResource) -> None:
-        self._session = session
-
-        self.create = async_to_streamed_response_wrapper(
-            session.create,
-        )
-        self.retrieve = async_to_streamed_response_wrapper(
-            session.retrieve,
-        )
-        self.delete = async_to_streamed_response_wrapper(
-            session.delete,
-        )
diff --git a/src/llama_stack_client/resources/agents/steps.py b/src/llama_stack_client/resources/agents/steps.py
deleted file mode 100644
index 78f9a88e..00000000
--- a/src/llama_stack_client/resources/agents/steps.py
+++ /dev/null
@@ -1,181 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import httpx
-
-from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from ..._base_client import make_request_options
-from ...types.agents.step_retrieve_response import StepRetrieveResponse
-
-__all__ = ["StepsResource", "AsyncStepsResource"]
-
-
-class StepsResource(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> StepsResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return StepsResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> StepsResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return StepsResourceWithStreamingResponse(self)
-
-    def retrieve(
-        self,
-        step_id: str,
-        *,
-        agent_id: str,
-        session_id: str,
-        turn_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> StepRetrieveResponse:
-        """
-        Retrieve an agent step by its ID.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not agent_id:
-            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
-        if not session_id:
-            raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
-        if not turn_id:
-            raise ValueError(f"Expected a non-empty value for `turn_id` but received {turn_id!r}")
-        if not step_id:
-            raise ValueError(f"Expected a non-empty value for `step_id` but received {step_id!r}")
-        return self._get(
-            f"/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=StepRetrieveResponse,
-        )
-
-
-class AsyncStepsResource(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncStepsResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncStepsResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncStepsResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncStepsResourceWithStreamingResponse(self)
-
-    async def retrieve(
-        self,
-        step_id: str,
-        *,
-        agent_id: str,
-        session_id: str,
-        turn_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> StepRetrieveResponse:
-        """
-        Retrieve an agent step by its ID.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not agent_id:
-            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
-        if not session_id:
-            raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
-        if not turn_id:
-            raise ValueError(f"Expected a non-empty value for `turn_id` but received {turn_id!r}")
-        if not step_id:
-            raise ValueError(f"Expected a non-empty value for `step_id` but received {step_id!r}")
-        return await self._get(
-            f"/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=StepRetrieveResponse,
-        )
-
-
-class StepsResourceWithRawResponse:
-    def __init__(self, steps: StepsResource) -> None:
-        self._steps = steps
-
-        self.retrieve = to_raw_response_wrapper(
-            steps.retrieve,
-        )
-
-
-class AsyncStepsResourceWithRawResponse:
-    def __init__(self, steps: AsyncStepsResource) -> None:
-        self._steps = steps
-
-        self.retrieve = async_to_raw_response_wrapper(
-            steps.retrieve,
-        )
-
-
-class StepsResourceWithStreamingResponse:
-    def __init__(self, steps: StepsResource) -> None:
-        self._steps = steps
-
-        self.retrieve = to_streamed_response_wrapper(
-            steps.retrieve,
-        )
-
-
-class AsyncStepsResourceWithStreamingResponse:
-    def __init__(self, steps: AsyncStepsResource) -> None:
-        self._steps = steps
-
-        self.retrieve = async_to_streamed_response_wrapper(
-            steps.retrieve,
-        )
diff --git a/src/llama_stack_client/resources/agents/turn.py b/src/llama_stack_client/resources/agents/turn.py
deleted file mode 100644
index b98b593b..00000000
--- a/src/llama_stack_client/resources/agents/turn.py
+++ /dev/null
@@ -1,875 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import List, Iterable
-from typing_extensions import Literal, overload
-
-import httpx
-
-from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ..._utils import required_args, maybe_transform, async_maybe_transform
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from ..._streaming import Stream, AsyncStream
-from ..._base_client import make_request_options
-from ...types.agents import turn_create_params, turn_resume_params
-from ...types.agents.turn import Turn
-from ...types.tool_response_param import ToolResponseParam
-from ...types.agents.agent_turn_response_stream_chunk import AgentTurnResponseStreamChunk
-
-__all__ = ["TurnResource", "AsyncTurnResource"]
-
-
-class TurnResource(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> TurnResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return TurnResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> TurnResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return TurnResourceWithStreamingResponse(self)
-
-    @overload
-    def create(
-        self,
-        session_id: str,
-        *,
-        agent_id: str,
-        messages: Iterable[turn_create_params.Message],
-        documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN,
-        stream: Literal[False] | NotGiven = NOT_GIVEN,
-        tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN,
-        toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Turn:
-        """
-        Create a new turn for an agent.
-
-        Args:
-          messages: List of messages to start the turn with.
-
-          documents: (Optional) List of documents to create the turn with.
-
-          stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
-              False.
-
-          tool_config: (Optional) The tool configuration to create the turn with, will be used to
-              override the agent's tool_config.
-
-          toolgroups: (Optional) List of toolgroups to create the turn with, will be used in addition
-              to the agent's config toolgroups for the request.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def create(
-        self,
-        session_id: str,
-        *,
-        agent_id: str,
-        messages: Iterable[turn_create_params.Message],
-        stream: Literal[True],
-        documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN,
-        tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN,
-        toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Stream[AgentTurnResponseStreamChunk]:
-        """
-        Create a new turn for an agent.
-
-        Args:
-          messages: List of messages to start the turn with.
-
-          stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
-              False.
-
-          documents: (Optional) List of documents to create the turn with.
-
-          tool_config: (Optional) The tool configuration to create the turn with, will be used to
-              override the agent's tool_config.
-
-          toolgroups: (Optional) List of toolgroups to create the turn with, will be used in addition
-              to the agent's config toolgroups for the request.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def create(
-        self,
-        session_id: str,
-        *,
-        agent_id: str,
-        messages: Iterable[turn_create_params.Message],
-        stream: bool,
-        documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN,
-        tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN,
-        toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Turn | Stream[AgentTurnResponseStreamChunk]:
-        """
-        Create a new turn for an agent.
-
-        Args:
-          messages: List of messages to start the turn with.
-
-          stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
-              False.
-
-          documents: (Optional) List of documents to create the turn with.
-
-          tool_config: (Optional) The tool configuration to create the turn with, will be used to
-              override the agent's tool_config.
-
-          toolgroups: (Optional) List of toolgroups to create the turn with, will be used in addition
-              to the agent's config toolgroups for the request.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @required_args(["agent_id", "messages"], ["agent_id", "messages", "stream"])
-    def create(
-        self,
-        session_id: str,
-        *,
-        agent_id: str,
-        messages: Iterable[turn_create_params.Message],
-        documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN,
-        stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
-        tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN,
-        toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Turn | Stream[AgentTurnResponseStreamChunk]:
-        if not agent_id:
-            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
-        if not session_id:
-            raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
-        return self._post(
-            f"/v1/agents/{agent_id}/session/{session_id}/turn",
-            body=maybe_transform(
-                {
-                    "messages": messages,
-                    "documents": documents,
-                    "stream": stream,
-                    "tool_config": tool_config,
-                    "toolgroups": toolgroups,
-                },
-                turn_create_params.TurnCreateParamsStreaming
-                if stream
-                else turn_create_params.TurnCreateParamsNonStreaming,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Turn,
-            stream=stream or False,
-            stream_cls=Stream[AgentTurnResponseStreamChunk],
-        )
-
-    def retrieve(
-        self,
-        turn_id: str,
-        *,
-        agent_id: str,
-        session_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Turn:
-        """
-        Retrieve an agent turn by its ID.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not agent_id:
-            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
-        if not session_id:
-            raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
-        if not turn_id:
-            raise ValueError(f"Expected a non-empty value for `turn_id` but received {turn_id!r}")
-        return self._get(
-            f"/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Turn,
-        )
-
-    @overload
-    def resume(
-        self,
-        turn_id: str,
-        *,
-        agent_id: str,
-        session_id: str,
-        tool_responses: Iterable[ToolResponseParam],
-        stream: Literal[False] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Turn:
-        """Resume an agent turn with executed tool call responses.
-
-        When a Turn has the
-        status `awaiting_input` due to pending input from client side tool calls, this
-        endpoint can be used to submit the outputs from the tool calls once they are
-        ready.
-
-        Args:
-          tool_responses: The tool call responses to resume the turn with.
-
-          stream: Whether to stream the response.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def resume(
-        self,
-        turn_id: str,
-        *,
-        agent_id: str,
-        session_id: str,
-        stream: Literal[True],
-        tool_responses: Iterable[ToolResponseParam],
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Stream[AgentTurnResponseStreamChunk]:
-        """Resume an agent turn with executed tool call responses.
-
-        When a Turn has the
-        status `awaiting_input` due to pending input from client side tool calls, this
-        endpoint can be used to submit the outputs from the tool calls once they are
-        ready.
-
-        Args:
-          stream: Whether to stream the response.
-
-          tool_responses: The tool call responses to resume the turn with.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def resume(
-        self,
-        turn_id: str,
-        *,
-        agent_id: str,
-        session_id: str,
-        stream: bool,
-        tool_responses: Iterable[ToolResponseParam],
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Turn | Stream[AgentTurnResponseStreamChunk]:
-        """Resume an agent turn with executed tool call responses.
-
-        When a Turn has the
-        status `awaiting_input` due to pending input from client side tool calls, this
-        endpoint can be used to submit the outputs from the tool calls once they are
-        ready.
-
-        Args:
-          stream: Whether to stream the response.
-
-          tool_responses: The tool call responses to resume the turn with.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @required_args(["agent_id", "session_id", "tool_responses"], ["agent_id", "session_id", "stream", "tool_responses"])
-    def resume(
-        self,
-        turn_id: str,
-        *,
-        agent_id: str,
-        session_id: str,
-        tool_responses: Iterable[ToolResponseParam],
-        stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Turn | Stream[AgentTurnResponseStreamChunk]:
-        if not agent_id:
-            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
-        if not session_id:
-            raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
-        if not turn_id:
-            raise ValueError(f"Expected a non-empty value for `turn_id` but received {turn_id!r}")
-        return self._post(
-            f"/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume",
-            body=maybe_transform(
-                {
-                    "tool_responses": tool_responses,
-                    "stream": stream,
-                },
-                turn_resume_params.TurnResumeParamsStreaming
-                if stream
-                else turn_resume_params.TurnResumeParamsNonStreaming,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Turn,
-            stream=stream or False,
-            stream_cls=Stream[AgentTurnResponseStreamChunk],
-        )
-
-
-class AsyncTurnResource(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncTurnResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncTurnResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncTurnResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncTurnResourceWithStreamingResponse(self)
-
-    @overload
-    async def create(
-        self,
-        session_id: str,
-        *,
-        agent_id: str,
-        messages: Iterable[turn_create_params.Message],
-        documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN,
-        stream: Literal[False] | NotGiven = NOT_GIVEN,
-        tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN,
-        toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Turn:
-        """
-        Create a new turn for an agent.
-
-        Args:
-          messages: List of messages to start the turn with.
-
-          documents: (Optional) List of documents to create the turn with.
-
-          stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
-              False.
-
-          tool_config: (Optional) The tool configuration to create the turn with, will be used to
-              override the agent's tool_config.
-
-          toolgroups: (Optional) List of toolgroups to create the turn with, will be used in addition
-              to the agent's config toolgroups for the request.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def create(
-        self,
-        session_id: str,
-        *,
-        agent_id: str,
-        messages: Iterable[turn_create_params.Message],
-        stream: Literal[True],
-        documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN,
-        tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN,
-        toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncStream[AgentTurnResponseStreamChunk]:
-        """
-        Create a new turn for an agent.
-
-        Args:
-          messages: List of messages to start the turn with.
-
-          stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
-              False.
-
-          documents: (Optional) List of documents to create the turn with.
-
-          tool_config: (Optional) The tool configuration to create the turn with, will be used to
-              override the agent's tool_config.
-
-          toolgroups: (Optional) List of toolgroups to create the turn with, will be used in addition
-              to the agent's config toolgroups for the request.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def create(
-        self,
-        session_id: str,
-        *,
-        agent_id: str,
-        messages: Iterable[turn_create_params.Message],
-        stream: bool,
-        documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN,
-        tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN,
-        toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Turn | AsyncStream[AgentTurnResponseStreamChunk]:
-        """
-        Create a new turn for an agent.
-
-        Args:
-          messages: List of messages to start the turn with.
-
-          stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
-              False.
-
-          documents: (Optional) List of documents to create the turn with.
-
-          tool_config: (Optional) The tool configuration to create the turn with, will be used to
-              override the agent's tool_config.
-
-          toolgroups: (Optional) List of toolgroups to create the turn with, will be used in addition
-              to the agent's config toolgroups for the request.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @required_args(["agent_id", "messages"], ["agent_id", "messages", "stream"])
-    async def create(
-        self,
-        session_id: str,
-        *,
-        agent_id: str,
-        messages: Iterable[turn_create_params.Message],
-        documents: Iterable[turn_create_params.Document] | NotGiven = NOT_GIVEN,
-        stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
-        tool_config: turn_create_params.ToolConfig | NotGiven = NOT_GIVEN,
-        toolgroups: List[turn_create_params.Toolgroup] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Turn | AsyncStream[AgentTurnResponseStreamChunk]:
-        if not agent_id:
-            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
-        if not session_id:
-            raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
-        return await self._post(
-            f"/v1/agents/{agent_id}/session/{session_id}/turn",
-            body=await async_maybe_transform(
-                {
-                    "messages": messages,
-                    "documents": documents,
-                    "stream": stream,
-                    "tool_config": tool_config,
-                    "toolgroups": toolgroups,
-                },
-                turn_create_params.TurnCreateParamsStreaming
-                if stream
-                else turn_create_params.TurnCreateParamsNonStreaming,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Turn,
-            stream=stream or False,
-            stream_cls=AsyncStream[AgentTurnResponseStreamChunk],
-        )
-
-    async def retrieve(
-        self,
-        turn_id: str,
-        *,
-        agent_id: str,
-        session_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Turn:
-        """
-        Retrieve an agent turn by its ID.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not agent_id:
-            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
-        if not session_id:
-            raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
-        if not turn_id:
-            raise ValueError(f"Expected a non-empty value for `turn_id` but received {turn_id!r}")
-        return await self._get(
-            f"/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Turn,
-        )
-
-    @overload
-    async def resume(
-        self,
-        turn_id: str,
-        *,
-        agent_id: str,
-        session_id: str,
-        tool_responses: Iterable[ToolResponseParam],
-        stream: Literal[False] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Turn:
-        """Resume an agent turn with executed tool call responses.
-
-        When a Turn has the
-        status `awaiting_input` due to pending input from client side tool calls, this
-        endpoint can be used to submit the outputs from the tool calls once they are
-        ready.
-
-        Args:
-          tool_responses: The tool call responses to resume the turn with.
-
-          stream: Whether to stream the response.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def resume(
-        self,
-        turn_id: str,
-        *,
-        agent_id: str,
-        session_id: str,
-        stream: Literal[True],
-        tool_responses: Iterable[ToolResponseParam],
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncStream[AgentTurnResponseStreamChunk]:
-        """Resume an agent turn with executed tool call responses.
-
-        When a Turn has the
-        status `awaiting_input` due to pending input from client side tool calls, this
-        endpoint can be used to submit the outputs from the tool calls once they are
-        ready.
-
-        Args:
-          stream: Whether to stream the response.
-
-          tool_responses: The tool call responses to resume the turn with.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def resume(
-        self,
-        turn_id: str,
-        *,
-        agent_id: str,
-        session_id: str,
-        stream: bool,
-        tool_responses: Iterable[ToolResponseParam],
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Turn | AsyncStream[AgentTurnResponseStreamChunk]:
-        """Resume an agent turn with executed tool call responses.
-
-        When a Turn has the
-        status `awaiting_input` due to pending input from client side tool calls, this
-        endpoint can be used to submit the outputs from the tool calls once they are
-        ready.
-
-        Args:
-          stream: Whether to stream the response.
-
-          tool_responses: The tool call responses to resume the turn with.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @required_args(["agent_id", "session_id", "tool_responses"], ["agent_id", "session_id", "stream", "tool_responses"])
-    async def resume(
-        self,
-        turn_id: str,
-        *,
-        agent_id: str,
-        session_id: str,
-        tool_responses: Iterable[ToolResponseParam],
-        stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Turn | AsyncStream[AgentTurnResponseStreamChunk]:
-        if not agent_id:
-            raise ValueError(f"Expected a non-empty value for `agent_id` but received {agent_id!r}")
-        if not session_id:
-            raise ValueError(f"Expected a non-empty value for `session_id` but received {session_id!r}")
-        if not turn_id:
-            raise ValueError(f"Expected a non-empty value for `turn_id` but received {turn_id!r}")
-        return await self._post(
-            f"/v1/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume",
-            body=await async_maybe_transform(
-                {
-                    "tool_responses": tool_responses,
-                    "stream": stream,
-                },
-                turn_resume_params.TurnResumeParamsStreaming
-                if stream
-                else turn_resume_params.TurnResumeParamsNonStreaming,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Turn,
-            stream=stream or False,
-            stream_cls=AsyncStream[AgentTurnResponseStreamChunk],
-        )
-
-
-class TurnResourceWithRawResponse:
-    def __init__(self, turn: TurnResource) -> None:
-        self._turn = turn
-
-        self.create = to_raw_response_wrapper(
-            turn.create,
-        )
-        self.retrieve = to_raw_response_wrapper(
-            turn.retrieve,
-        )
-        self.resume = to_raw_response_wrapper(
-            turn.resume,
-        )
-
-
-class AsyncTurnResourceWithRawResponse:
-    def __init__(self, turn: AsyncTurnResource) -> None:
-        self._turn = turn
-
-        self.create = async_to_raw_response_wrapper(
-            turn.create,
-        )
-        self.retrieve = async_to_raw_response_wrapper(
-            turn.retrieve,
-        )
-        self.resume = async_to_raw_response_wrapper(
-            turn.resume,
-        )
-
-
-class TurnResourceWithStreamingResponse:
-    def __init__(self, turn: TurnResource) -> None:
-        self._turn = turn
-
-        self.create = to_streamed_response_wrapper(
-            turn.create,
-        )
-        self.retrieve = to_streamed_response_wrapper(
-            turn.retrieve,
-        )
-        self.resume = to_streamed_response_wrapper(
-            turn.resume,
-        )
-
-
-class AsyncTurnResourceWithStreamingResponse:
-    def __init__(self, turn: AsyncTurnResource) -> None:
-        self._turn = turn
-
-        self.create = async_to_streamed_response_wrapper(
-            turn.create,
-        )
-        self.retrieve = async_to_streamed_response_wrapper(
-            turn.retrieve,
-        )
-        self.resume = async_to_streamed_response_wrapper(
-            turn.resume,
-        )
diff --git a/src/llama_stack_client/resources/benchmarks.py b/src/llama_stack_client/resources/benchmarks.py
deleted file mode 100644
index 7b92833b..00000000
--- a/src/llama_stack_client/resources/benchmarks.py
+++ /dev/null
@@ -1,359 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, List, Type, Union, Iterable, cast
-
-import httpx
-
-from ..types import benchmark_register_params
-from .._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
-from .._utils import maybe_transform, async_maybe_transform
-from .._compat import cached_property
-from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from .._wrappers import DataWrapper
-from .._base_client import make_request_options
-from ..types.benchmark import Benchmark
-from ..types.benchmark_list_response import BenchmarkListResponse
-
-__all__ = ["BenchmarksResource", "AsyncBenchmarksResource"]
-
-
-class BenchmarksResource(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> BenchmarksResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return BenchmarksResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> BenchmarksResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return BenchmarksResourceWithStreamingResponse(self)
-
-    def retrieve(
-        self,
-        benchmark_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Benchmark:
-        """
-        Get a benchmark by its ID.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not benchmark_id:
-            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
-        return self._get(
-            f"/v1/eval/benchmarks/{benchmark_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Benchmark,
-        )
-
-    def list(
-        self,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> BenchmarkListResponse:
-        """List all benchmarks."""
-        return self._get(
-            "/v1/eval/benchmarks",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                post_parser=DataWrapper[BenchmarkListResponse]._unwrapper,
-            ),
-            cast_to=cast(Type[BenchmarkListResponse], DataWrapper[BenchmarkListResponse]),
-        )
-
-    def register(
-        self,
-        *,
-        benchmark_id: str,
-        dataset_id: str,
-        scoring_functions: List[str],
-        metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        provider_benchmark_id: str | NotGiven = NOT_GIVEN,
-        provider_id: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> None:
-        """
-        Register a benchmark.
-
-        Args:
-          benchmark_id: The ID of the benchmark to register.
-
-          dataset_id: The ID of the dataset to use for the benchmark.
-
-          scoring_functions: The scoring functions to use for the benchmark.
-
-          metadata: The metadata to use for the benchmark.
-
-          provider_benchmark_id: The ID of the provider benchmark to use for the benchmark.
-
-          provider_id: The ID of the provider to use for the benchmark.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        return self._post(
-            "/v1/eval/benchmarks",
-            body=maybe_transform(
-                {
-                    "benchmark_id": benchmark_id,
-                    "dataset_id": dataset_id,
-                    "scoring_functions": scoring_functions,
-                    "metadata": metadata,
-                    "provider_benchmark_id": provider_benchmark_id,
-                    "provider_id": provider_id,
-                },
-                benchmark_register_params.BenchmarkRegisterParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=NoneType,
-        )
-
-
-class AsyncBenchmarksResource(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncBenchmarksResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncBenchmarksResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncBenchmarksResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncBenchmarksResourceWithStreamingResponse(self)
-
-    async def retrieve(
-        self,
-        benchmark_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Benchmark:
-        """
-        Get a benchmark by its ID.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not benchmark_id:
-            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
-        return await self._get(
-            f"/v1/eval/benchmarks/{benchmark_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Benchmark,
-        )
-
-    async def list(
-        self,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> BenchmarkListResponse:
-        """List all benchmarks."""
-        return await self._get(
-            "/v1/eval/benchmarks",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                post_parser=DataWrapper[BenchmarkListResponse]._unwrapper,
-            ),
-            cast_to=cast(Type[BenchmarkListResponse], DataWrapper[BenchmarkListResponse]),
-        )
-
-    async def register(
-        self,
-        *,
-        benchmark_id: str,
-        dataset_id: str,
-        scoring_functions: List[str],
-        metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        provider_benchmark_id: str | NotGiven = NOT_GIVEN,
-        provider_id: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> None:
-        """
-        Register a benchmark.
-
-        Args:
-          benchmark_id: The ID of the benchmark to register.
-
-          dataset_id: The ID of the dataset to use for the benchmark.
-
-          scoring_functions: The scoring functions to use for the benchmark.
-
-          metadata: The metadata to use for the benchmark.
-
-          provider_benchmark_id: The ID of the provider benchmark to use for the benchmark.
-
-          provider_id: The ID of the provider to use for the benchmark.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        return await self._post(
-            "/v1/eval/benchmarks",
-            body=await async_maybe_transform(
-                {
-                    "benchmark_id": benchmark_id,
-                    "dataset_id": dataset_id,
-                    "scoring_functions": scoring_functions,
-                    "metadata": metadata,
-                    "provider_benchmark_id": provider_benchmark_id,
-                    "provider_id": provider_id,
-                },
-                benchmark_register_params.BenchmarkRegisterParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=NoneType,
-        )
-
-
-class BenchmarksResourceWithRawResponse:
-    def __init__(self, benchmarks: BenchmarksResource) -> None:
-        self._benchmarks = benchmarks
-
-        self.retrieve = to_raw_response_wrapper(
-            benchmarks.retrieve,
-        )
-        self.list = to_raw_response_wrapper(
-            benchmarks.list,
-        )
-        self.register = to_raw_response_wrapper(
-            benchmarks.register,
-        )
-
-
-class AsyncBenchmarksResourceWithRawResponse:
-    def __init__(self, benchmarks: AsyncBenchmarksResource) -> None:
-        self._benchmarks = benchmarks
-
-        self.retrieve = async_to_raw_response_wrapper(
-            benchmarks.retrieve,
-        )
-        self.list = async_to_raw_response_wrapper(
-            benchmarks.list,
-        )
-        self.register = async_to_raw_response_wrapper(
-            benchmarks.register,
-        )
-
-
-class BenchmarksResourceWithStreamingResponse:
-    def __init__(self, benchmarks: BenchmarksResource) -> None:
-        self._benchmarks = benchmarks
-
-        self.retrieve = to_streamed_response_wrapper(
-            benchmarks.retrieve,
-        )
-        self.list = to_streamed_response_wrapper(
-            benchmarks.list,
-        )
-        self.register = to_streamed_response_wrapper(
-            benchmarks.register,
-        )
-
-
-class AsyncBenchmarksResourceWithStreamingResponse:
-    def __init__(self, benchmarks: AsyncBenchmarksResource) -> None:
-        self._benchmarks = benchmarks
-
-        self.retrieve = async_to_streamed_response_wrapper(
-            benchmarks.retrieve,
-        )
-        self.list = async_to_streamed_response_wrapper(
-            benchmarks.list,
-        )
-        self.register = async_to_streamed_response_wrapper(
-            benchmarks.register,
-        )
diff --git a/src/llama_stack_client/resources/chat/__init__.py b/src/llama_stack_client/resources/chat/__init__.py
deleted file mode 100644
index ec960eb4..00000000
--- a/src/llama_stack_client/resources/chat/__init__.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .chat import (
-    ChatResource,
-    AsyncChatResource,
-    ChatResourceWithRawResponse,
-    AsyncChatResourceWithRawResponse,
-    ChatResourceWithStreamingResponse,
-    AsyncChatResourceWithStreamingResponse,
-)
-from .completions import (
-    CompletionsResource,
-    AsyncCompletionsResource,
-    CompletionsResourceWithRawResponse,
-    AsyncCompletionsResourceWithRawResponse,
-    CompletionsResourceWithStreamingResponse,
-    AsyncCompletionsResourceWithStreamingResponse,
-)
-
-__all__ = [
-    "CompletionsResource",
-    "AsyncCompletionsResource",
-    "CompletionsResourceWithRawResponse",
-    "AsyncCompletionsResourceWithRawResponse",
-    "CompletionsResourceWithStreamingResponse",
-    "AsyncCompletionsResourceWithStreamingResponse",
-    "ChatResource",
-    "AsyncChatResource",
-    "ChatResourceWithRawResponse",
-    "AsyncChatResourceWithRawResponse",
-    "ChatResourceWithStreamingResponse",
-    "AsyncChatResourceWithStreamingResponse",
-]
diff --git a/src/llama_stack_client/resources/chat/chat.py b/src/llama_stack_client/resources/chat/chat.py
deleted file mode 100644
index 3e3715c1..00000000
--- a/src/llama_stack_client/resources/chat/chat.py
+++ /dev/null
@@ -1,102 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from .completions import (
-    CompletionsResource,
-    AsyncCompletionsResource,
-    CompletionsResourceWithRawResponse,
-    AsyncCompletionsResourceWithRawResponse,
-    CompletionsResourceWithStreamingResponse,
-    AsyncCompletionsResourceWithStreamingResponse,
-)
-
-__all__ = ["ChatResource", "AsyncChatResource"]
-
-
-class ChatResource(SyncAPIResource):
-    @cached_property
-    def completions(self) -> CompletionsResource:
-        return CompletionsResource(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> ChatResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return ChatResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> ChatResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return ChatResourceWithStreamingResponse(self)
-
-
-class AsyncChatResource(AsyncAPIResource):
-    @cached_property
-    def completions(self) -> AsyncCompletionsResource:
-        return AsyncCompletionsResource(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> AsyncChatResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncChatResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncChatResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncChatResourceWithStreamingResponse(self)
-
-
-class ChatResourceWithRawResponse:
-    def __init__(self, chat: ChatResource) -> None:
-        self._chat = chat
-
-    @cached_property
-    def completions(self) -> CompletionsResourceWithRawResponse:
-        return CompletionsResourceWithRawResponse(self._chat.completions)
-
-
-class AsyncChatResourceWithRawResponse:
-    def __init__(self, chat: AsyncChatResource) -> None:
-        self._chat = chat
-
-    @cached_property
-    def completions(self) -> AsyncCompletionsResourceWithRawResponse:
-        return AsyncCompletionsResourceWithRawResponse(self._chat.completions)
-
-
-class ChatResourceWithStreamingResponse:
-    def __init__(self, chat: ChatResource) -> None:
-        self._chat = chat
-
-    @cached_property
-    def completions(self) -> CompletionsResourceWithStreamingResponse:
-        return CompletionsResourceWithStreamingResponse(self._chat.completions)
-
-
-class AsyncChatResourceWithStreamingResponse:
-    def __init__(self, chat: AsyncChatResource) -> None:
-        self._chat = chat
-
-    @cached_property
-    def completions(self) -> AsyncCompletionsResourceWithStreamingResponse:
-        return AsyncCompletionsResourceWithStreamingResponse(self._chat.completions)
diff --git a/src/llama_stack_client/resources/chat/completions.py b/src/llama_stack_client/resources/chat/completions.py
deleted file mode 100644
index 1355f97a..00000000
--- a/src/llama_stack_client/resources/chat/completions.py
+++ /dev/null
@@ -1,1048 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Any, Dict, List, Union, Iterable, cast
-from typing_extensions import Literal, overload
-
-import httpx
-
-from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ..._utils import required_args, maybe_transform, async_maybe_transform
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from ..._streaming import Stream, AsyncStream
-from ...types.chat import completion_list_params, completion_create_params
-from ..._base_client import make_request_options
-from ...types.chat_completion_chunk import ChatCompletionChunk
-from ...types.chat.completion_list_response import CompletionListResponse
-from ...types.chat.completion_create_response import CompletionCreateResponse
-from ...types.chat.completion_retrieve_response import CompletionRetrieveResponse
-
-__all__ = ["CompletionsResource", "AsyncCompletionsResource"]
-
-
-class CompletionsResource(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> CompletionsResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return CompletionsResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> CompletionsResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return CompletionsResourceWithStreamingResponse(self)
-
-    @overload
-    def create(
-        self,
-        *,
-        messages: Iterable[completion_create_params.Message],
-        model: str,
-        frequency_penalty: float | NotGiven = NOT_GIVEN,
-        function_call: Union[str, Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-        | NotGiven = NOT_GIVEN,
-        functions: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN,
-        logprobs: bool | NotGiven = NOT_GIVEN,
-        max_completion_tokens: int | NotGiven = NOT_GIVEN,
-        max_tokens: int | NotGiven = NOT_GIVEN,
-        n: int | NotGiven = NOT_GIVEN,
-        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
-        presence_penalty: float | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: int | NotGiven = NOT_GIVEN,
-        stop: Union[str, List[str]] | NotGiven = NOT_GIVEN,
-        stream: Literal[False] | NotGiven = NOT_GIVEN,
-        stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: Union[str, Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-        | NotGiven = NOT_GIVEN,
-        tools: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] | NotGiven = NOT_GIVEN,
-        top_logprobs: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> CompletionCreateResponse:
-        """
-        Generate an OpenAI-compatible chat completion for the given messages using the
-        specified model.
-
-        Args:
-          messages: List of messages in the conversation.
-
-          model: The identifier of the model to use. The model must be registered with Llama
-              Stack and available via the /models endpoint.
-
-          frequency_penalty: (Optional) The penalty for repeated tokens.
-
-          function_call: (Optional) The function call to use.
-
-          functions: (Optional) List of functions to use.
-
-          logit_bias: (Optional) The logit bias to use.
-
-          logprobs: (Optional) The log probabilities to use.
-
-          max_completion_tokens: (Optional) The maximum number of tokens to generate.
-
-          max_tokens: (Optional) The maximum number of tokens to generate.
-
-          n: (Optional) The number of completions to generate.
-
-          parallel_tool_calls: (Optional) Whether to parallelize tool calls.
-
-          presence_penalty: (Optional) The penalty for repeated tokens.
-
-          response_format: (Optional) The response format to use.
-
-          seed: (Optional) The seed to use.
-
-          stop: (Optional) The stop tokens to use.
-
-          stream: (Optional) Whether to stream the response.
-
-          stream_options: (Optional) The stream options to use.
-
-          temperature: (Optional) The temperature to use.
-
-          tool_choice: (Optional) The tool choice to use.
-
-          tools: (Optional) The tools to use.
-
-          top_logprobs: (Optional) The top log probabilities to use.
-
-          top_p: (Optional) The top p to use.
-
-          user: (Optional) The user to use.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def create(
-        self,
-        *,
-        messages: Iterable[completion_create_params.Message],
-        model: str,
-        stream: Literal[True],
-        frequency_penalty: float | NotGiven = NOT_GIVEN,
-        function_call: Union[str, Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-        | NotGiven = NOT_GIVEN,
-        functions: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN,
-        logprobs: bool | NotGiven = NOT_GIVEN,
-        max_completion_tokens: int | NotGiven = NOT_GIVEN,
-        max_tokens: int | NotGiven = NOT_GIVEN,
-        n: int | NotGiven = NOT_GIVEN,
-        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
-        presence_penalty: float | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: int | NotGiven = NOT_GIVEN,
-        stop: Union[str, List[str]] | NotGiven = NOT_GIVEN,
-        stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: Union[str, Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-        | NotGiven = NOT_GIVEN,
-        tools: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] | NotGiven = NOT_GIVEN,
-        top_logprobs: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Stream[ChatCompletionChunk]:
-        """
-        Generate an OpenAI-compatible chat completion for the given messages using the
-        specified model.
-
-        Args:
-          messages: List of messages in the conversation.
-
-          model: The identifier of the model to use. The model must be registered with Llama
-              Stack and available via the /models endpoint.
-
-          stream: (Optional) Whether to stream the response.
-
-          frequency_penalty: (Optional) The penalty for repeated tokens.
-
-          function_call: (Optional) The function call to use.
-
-          functions: (Optional) List of functions to use.
-
-          logit_bias: (Optional) The logit bias to use.
-
-          logprobs: (Optional) The log probabilities to use.
-
-          max_completion_tokens: (Optional) The maximum number of tokens to generate.
-
-          max_tokens: (Optional) The maximum number of tokens to generate.
-
-          n: (Optional) The number of completions to generate.
-
-          parallel_tool_calls: (Optional) Whether to parallelize tool calls.
-
-          presence_penalty: (Optional) The penalty for repeated tokens.
-
-          response_format: (Optional) The response format to use.
-
-          seed: (Optional) The seed to use.
-
-          stop: (Optional) The stop tokens to use.
-
-          stream_options: (Optional) The stream options to use.
-
-          temperature: (Optional) The temperature to use.
-
-          tool_choice: (Optional) The tool choice to use.
-
-          tools: (Optional) The tools to use.
-
-          top_logprobs: (Optional) The top log probabilities to use.
-
-          top_p: (Optional) The top p to use.
-
-          user: (Optional) The user to use.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def create(
-        self,
-        *,
-        messages: Iterable[completion_create_params.Message],
-        model: str,
-        stream: bool,
-        frequency_penalty: float | NotGiven = NOT_GIVEN,
-        function_call: Union[str, Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-        | NotGiven = NOT_GIVEN,
-        functions: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN,
-        logprobs: bool | NotGiven = NOT_GIVEN,
-        max_completion_tokens: int | NotGiven = NOT_GIVEN,
-        max_tokens: int | NotGiven = NOT_GIVEN,
-        n: int | NotGiven = NOT_GIVEN,
-        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
-        presence_penalty: float | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: int | NotGiven = NOT_GIVEN,
-        stop: Union[str, List[str]] | NotGiven = NOT_GIVEN,
-        stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: Union[str, Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-        | NotGiven = NOT_GIVEN,
-        tools: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] | NotGiven = NOT_GIVEN,
-        top_logprobs: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> CompletionCreateResponse | Stream[ChatCompletionChunk]:
-        """
-        Generate an OpenAI-compatible chat completion for the given messages using the
-        specified model.
-
-        Args:
-          messages: List of messages in the conversation.
-
-          model: The identifier of the model to use. The model must be registered with Llama
-              Stack and available via the /models endpoint.
-
-          stream: (Optional) Whether to stream the response.
-
-          frequency_penalty: (Optional) The penalty for repeated tokens.
-
-          function_call: (Optional) The function call to use.
-
-          functions: (Optional) List of functions to use.
-
-          logit_bias: (Optional) The logit bias to use.
-
-          logprobs: (Optional) The log probabilities to use.
-
-          max_completion_tokens: (Optional) The maximum number of tokens to generate.
-
-          max_tokens: (Optional) The maximum number of tokens to generate.
-
-          n: (Optional) The number of completions to generate.
-
-          parallel_tool_calls: (Optional) Whether to parallelize tool calls.
-
-          presence_penalty: (Optional) The penalty for repeated tokens.
-
-          response_format: (Optional) The response format to use.
-
-          seed: (Optional) The seed to use.
-
-          stop: (Optional) The stop tokens to use.
-
-          stream_options: (Optional) The stream options to use.
-
-          temperature: (Optional) The temperature to use.
-
-          tool_choice: (Optional) The tool choice to use.
-
-          tools: (Optional) The tools to use.
-
-          top_logprobs: (Optional) The top log probabilities to use.
-
-          top_p: (Optional) The top p to use.
-
-          user: (Optional) The user to use.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @required_args(["messages", "model"], ["messages", "model", "stream"])
-    def create(
-        self,
-        *,
-        messages: Iterable[completion_create_params.Message],
-        model: str,
-        frequency_penalty: float | NotGiven = NOT_GIVEN,
-        function_call: Union[str, Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-        | NotGiven = NOT_GIVEN,
-        functions: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN,
-        logprobs: bool | NotGiven = NOT_GIVEN,
-        max_completion_tokens: int | NotGiven = NOT_GIVEN,
-        max_tokens: int | NotGiven = NOT_GIVEN,
-        n: int | NotGiven = NOT_GIVEN,
-        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
-        presence_penalty: float | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: int | NotGiven = NOT_GIVEN,
-        stop: Union[str, List[str]] | NotGiven = NOT_GIVEN,
-        stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
-        stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: Union[str, Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-        | NotGiven = NOT_GIVEN,
-        tools: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] | NotGiven = NOT_GIVEN,
-        top_logprobs: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> CompletionCreateResponse | Stream[ChatCompletionChunk]:
-        return self._post(
-            "/v1/openai/v1/chat/completions",
-            body=maybe_transform(
-                {
-                    "messages": messages,
-                    "model": model,
-                    "frequency_penalty": frequency_penalty,
-                    "function_call": function_call,
-                    "functions": functions,
-                    "logit_bias": logit_bias,
-                    "logprobs": logprobs,
-                    "max_completion_tokens": max_completion_tokens,
-                    "max_tokens": max_tokens,
-                    "n": n,
-                    "parallel_tool_calls": parallel_tool_calls,
-                    "presence_penalty": presence_penalty,
-                    "response_format": response_format,
-                    "seed": seed,
-                    "stop": stop,
-                    "stream": stream,
-                    "stream_options": stream_options,
-                    "temperature": temperature,
-                    "tool_choice": tool_choice,
-                    "tools": tools,
-                    "top_logprobs": top_logprobs,
-                    "top_p": top_p,
-                    "user": user,
-                },
-                completion_create_params.CompletionCreateParamsStreaming
-                if stream
-                else completion_create_params.CompletionCreateParamsNonStreaming,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=cast(
-                Any, CompletionCreateResponse
-            ),  # Union types cannot be passed in as arguments in the type system
-            stream=stream or False,
-            stream_cls=Stream[ChatCompletionChunk],
-        )
-
-    def retrieve(
-        self,
-        completion_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> CompletionRetrieveResponse:
-        """
-        Describe a chat completion by its ID.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not completion_id:
-            raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
-        return self._get(
-            f"/v1/openai/v1/chat/completions/{completion_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=CompletionRetrieveResponse,
-        )
-
-    def list(
-        self,
-        *,
-        after: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
-        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> CompletionListResponse:
-        """
-        List all chat completions.
-
-        Args:
-          after: The ID of the last chat completion to return.
-
-          limit: The maximum number of chat completions to return.
-
-          model: The model to filter by.
-
-          order: The order to sort the chat completions by: "asc" or "desc". Defaults to "desc".
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._get(
-            "/v1/openai/v1/chat/completions",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform(
-                    {
-                        "after": after,
-                        "limit": limit,
-                        "model": model,
-                        "order": order,
-                    },
-                    completion_list_params.CompletionListParams,
-                ),
-            ),
-            cast_to=CompletionListResponse,
-        )
-
-
-class AsyncCompletionsResource(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncCompletionsResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncCompletionsResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncCompletionsResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncCompletionsResourceWithStreamingResponse(self)
-
-    @overload
-    async def create(
-        self,
-        *,
-        messages: Iterable[completion_create_params.Message],
-        model: str,
-        frequency_penalty: float | NotGiven = NOT_GIVEN,
-        function_call: Union[str, Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-        | NotGiven = NOT_GIVEN,
-        functions: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN,
-        logprobs: bool | NotGiven = NOT_GIVEN,
-        max_completion_tokens: int | NotGiven = NOT_GIVEN,
-        max_tokens: int | NotGiven = NOT_GIVEN,
-        n: int | NotGiven = NOT_GIVEN,
-        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
-        presence_penalty: float | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: int | NotGiven = NOT_GIVEN,
-        stop: Union[str, List[str]] | NotGiven = NOT_GIVEN,
-        stream: Literal[False] | NotGiven = NOT_GIVEN,
-        stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: Union[str, Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-        | NotGiven = NOT_GIVEN,
-        tools: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] | NotGiven = NOT_GIVEN,
-        top_logprobs: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> CompletionCreateResponse:
-        """
-        Generate an OpenAI-compatible chat completion for the given messages using the
-        specified model.
-
-        Args:
-          messages: List of messages in the conversation.
-
-          model: The identifier of the model to use. The model must be registered with Llama
-              Stack and available via the /models endpoint.
-
-          frequency_penalty: (Optional) The penalty for repeated tokens.
-
-          function_call: (Optional) The function call to use.
-
-          functions: (Optional) List of functions to use.
-
-          logit_bias: (Optional) The logit bias to use.
-
-          logprobs: (Optional) The log probabilities to use.
-
-          max_completion_tokens: (Optional) The maximum number of tokens to generate.
-
-          max_tokens: (Optional) The maximum number of tokens to generate.
-
-          n: (Optional) The number of completions to generate.
-
-          parallel_tool_calls: (Optional) Whether to parallelize tool calls.
-
-          presence_penalty: (Optional) The penalty for repeated tokens.
-
-          response_format: (Optional) The response format to use.
-
-          seed: (Optional) The seed to use.
-
-          stop: (Optional) The stop tokens to use.
-
-          stream: (Optional) Whether to stream the response.
-
-          stream_options: (Optional) The stream options to use.
-
-          temperature: (Optional) The temperature to use.
-
-          tool_choice: (Optional) The tool choice to use.
-
-          tools: (Optional) The tools to use.
-
-          top_logprobs: (Optional) The top log probabilities to use.
-
-          top_p: (Optional) The top p to use.
-
-          user: (Optional) The user to use.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def create(
-        self,
-        *,
-        messages: Iterable[completion_create_params.Message],
-        model: str,
-        stream: Literal[True],
-        frequency_penalty: float | NotGiven = NOT_GIVEN,
-        function_call: Union[str, Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-        | NotGiven = NOT_GIVEN,
-        functions: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN,
-        logprobs: bool | NotGiven = NOT_GIVEN,
-        max_completion_tokens: int | NotGiven = NOT_GIVEN,
-        max_tokens: int | NotGiven = NOT_GIVEN,
-        n: int | NotGiven = NOT_GIVEN,
-        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
-        presence_penalty: float | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: int | NotGiven = NOT_GIVEN,
-        stop: Union[str, List[str]] | NotGiven = NOT_GIVEN,
-        stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: Union[str, Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-        | NotGiven = NOT_GIVEN,
-        tools: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] | NotGiven = NOT_GIVEN,
-        top_logprobs: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncStream[ChatCompletionChunk]:
-        """
-        Generate an OpenAI-compatible chat completion for the given messages using the
-        specified model.
-
-        Args:
-          messages: List of messages in the conversation.
-
-          model: The identifier of the model to use. The model must be registered with Llama
-              Stack and available via the /models endpoint.
-
-          stream: (Optional) Whether to stream the response.
-
-          frequency_penalty: (Optional) The penalty for repeated tokens.
-
-          function_call: (Optional) The function call to use.
-
-          functions: (Optional) List of functions to use.
-
-          logit_bias: (Optional) The logit bias to use.
-
-          logprobs: (Optional) The log probabilities to use.
-
-          max_completion_tokens: (Optional) The maximum number of tokens to generate.
-
-          max_tokens: (Optional) The maximum number of tokens to generate.
-
-          n: (Optional) The number of completions to generate.
-
-          parallel_tool_calls: (Optional) Whether to parallelize tool calls.
-
-          presence_penalty: (Optional) The penalty for repeated tokens.
-
-          response_format: (Optional) The response format to use.
-
-          seed: (Optional) The seed to use.
-
-          stop: (Optional) The stop tokens to use.
-
-          stream_options: (Optional) The stream options to use.
-
-          temperature: (Optional) The temperature to use.
-
-          tool_choice: (Optional) The tool choice to use.
-
-          tools: (Optional) The tools to use.
-
-          top_logprobs: (Optional) The top log probabilities to use.
-
-          top_p: (Optional) The top p to use.
-
-          user: (Optional) The user to use.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def create(
-        self,
-        *,
-        messages: Iterable[completion_create_params.Message],
-        model: str,
-        stream: bool,
-        frequency_penalty: float | NotGiven = NOT_GIVEN,
-        function_call: Union[str, Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-        | NotGiven = NOT_GIVEN,
-        functions: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN,
-        logprobs: bool | NotGiven = NOT_GIVEN,
-        max_completion_tokens: int | NotGiven = NOT_GIVEN,
-        max_tokens: int | NotGiven = NOT_GIVEN,
-        n: int | NotGiven = NOT_GIVEN,
-        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
-        presence_penalty: float | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: int | NotGiven = NOT_GIVEN,
-        stop: Union[str, List[str]] | NotGiven = NOT_GIVEN,
-        stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: Union[str, Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-        | NotGiven = NOT_GIVEN,
-        tools: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] | NotGiven = NOT_GIVEN,
-        top_logprobs: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> CompletionCreateResponse | AsyncStream[ChatCompletionChunk]:
-        """
-        Generate an OpenAI-compatible chat completion for the given messages using the
-        specified model.
-
-        Args:
-          messages: List of messages in the conversation.
-
-          model: The identifier of the model to use. The model must be registered with Llama
-              Stack and available via the /models endpoint.
-
-          stream: (Optional) Whether to stream the response.
-
-          frequency_penalty: (Optional) The penalty for repeated tokens.
-
-          function_call: (Optional) The function call to use.
-
-          functions: (Optional) List of functions to use.
-
-          logit_bias: (Optional) The logit bias to use.
-
-          logprobs: (Optional) The log probabilities to use.
-
-          max_completion_tokens: (Optional) The maximum number of tokens to generate.
-
-          max_tokens: (Optional) The maximum number of tokens to generate.
-
-          n: (Optional) The number of completions to generate.
-
-          parallel_tool_calls: (Optional) Whether to parallelize tool calls.
-
-          presence_penalty: (Optional) The penalty for repeated tokens.
-
-          response_format: (Optional) The response format to use.
-
-          seed: (Optional) The seed to use.
-
-          stop: (Optional) The stop tokens to use.
-
-          stream_options: (Optional) The stream options to use.
-
-          temperature: (Optional) The temperature to use.
-
-          tool_choice: (Optional) The tool choice to use.
-
-          tools: (Optional) The tools to use.
-
-          top_logprobs: (Optional) The top log probabilities to use.
-
-          top_p: (Optional) The top p to use.
-
-          user: (Optional) The user to use.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @required_args(["messages", "model"], ["messages", "model", "stream"])
-    async def create(
-        self,
-        *,
-        messages: Iterable[completion_create_params.Message],
-        model: str,
-        frequency_penalty: float | NotGiven = NOT_GIVEN,
-        function_call: Union[str, Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-        | NotGiven = NOT_GIVEN,
-        functions: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN,
-        logprobs: bool | NotGiven = NOT_GIVEN,
-        max_completion_tokens: int | NotGiven = NOT_GIVEN,
-        max_tokens: int | NotGiven = NOT_GIVEN,
-        n: int | NotGiven = NOT_GIVEN,
-        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
-        presence_penalty: float | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: int | NotGiven = NOT_GIVEN,
-        stop: Union[str, List[str]] | NotGiven = NOT_GIVEN,
-        stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
-        stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        tool_choice: Union[str, Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-        | NotGiven = NOT_GIVEN,
-        tools: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]] | NotGiven = NOT_GIVEN,
-        top_logprobs: int | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> CompletionCreateResponse | AsyncStream[ChatCompletionChunk]:
-        return await self._post(
-            "/v1/openai/v1/chat/completions",
-            body=await async_maybe_transform(
-                {
-                    "messages": messages,
-                    "model": model,
-                    "frequency_penalty": frequency_penalty,
-                    "function_call": function_call,
-                    "functions": functions,
-                    "logit_bias": logit_bias,
-                    "logprobs": logprobs,
-                    "max_completion_tokens": max_completion_tokens,
-                    "max_tokens": max_tokens,
-                    "n": n,
-                    "parallel_tool_calls": parallel_tool_calls,
-                    "presence_penalty": presence_penalty,
-                    "response_format": response_format,
-                    "seed": seed,
-                    "stop": stop,
-                    "stream": stream,
-                    "stream_options": stream_options,
-                    "temperature": temperature,
-                    "tool_choice": tool_choice,
-                    "tools": tools,
-                    "top_logprobs": top_logprobs,
-                    "top_p": top_p,
-                    "user": user,
-                },
-                completion_create_params.CompletionCreateParamsStreaming
-                if stream
-                else completion_create_params.CompletionCreateParamsNonStreaming,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=cast(
-                Any, CompletionCreateResponse
-            ),  # Union types cannot be passed in as arguments in the type system
-            stream=stream or False,
-            stream_cls=AsyncStream[ChatCompletionChunk],
-        )
-
-    async def retrieve(
-        self,
-        completion_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> CompletionRetrieveResponse:
-        """
-        Describe a chat completion by its ID.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not completion_id:
-            raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
-        return await self._get(
-            f"/v1/openai/v1/chat/completions/{completion_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=CompletionRetrieveResponse,
-        )
-
-    async def list(
-        self,
-        *,
-        after: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
-        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> CompletionListResponse:
-        """
-        List all chat completions.
-
-        Args:
-          after: The ID of the last chat completion to return.
-
-          limit: The maximum number of chat completions to return.
-
-          model: The model to filter by.
-
-          order: The order to sort the chat completions by: "asc" or "desc". Defaults to "desc".
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return await self._get(
-            "/v1/openai/v1/chat/completions",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=await async_maybe_transform(
-                    {
-                        "after": after,
-                        "limit": limit,
-                        "model": model,
-                        "order": order,
-                    },
-                    completion_list_params.CompletionListParams,
-                ),
-            ),
-            cast_to=CompletionListResponse,
-        )
-
-
-class CompletionsResourceWithRawResponse:
-    def __init__(self, completions: CompletionsResource) -> None:
-        self._completions = completions
-
-        self.create = to_raw_response_wrapper(
-            completions.create,
-        )
-        self.retrieve = to_raw_response_wrapper(
-            completions.retrieve,
-        )
-        self.list = to_raw_response_wrapper(
-            completions.list,
-        )
-
-
-class AsyncCompletionsResourceWithRawResponse:
-    def __init__(self, completions: AsyncCompletionsResource) -> None:
-        self._completions = completions
-
-        self.create = async_to_raw_response_wrapper(
-            completions.create,
-        )
-        self.retrieve = async_to_raw_response_wrapper(
-            completions.retrieve,
-        )
-        self.list = async_to_raw_response_wrapper(
-            completions.list,
-        )
-
-
-class CompletionsResourceWithStreamingResponse:
-    def __init__(self, completions: CompletionsResource) -> None:
-        self._completions = completions
-
-        self.create = to_streamed_response_wrapper(
-            completions.create,
-        )
-        self.retrieve = to_streamed_response_wrapper(
-            completions.retrieve,
-        )
-        self.list = to_streamed_response_wrapper(
-            completions.list,
-        )
-
-
-class AsyncCompletionsResourceWithStreamingResponse:
-    def __init__(self, completions: AsyncCompletionsResource) -> None:
-        self._completions = completions
-
-        self.create = async_to_streamed_response_wrapper(
-            completions.create,
-        )
-        self.retrieve = async_to_streamed_response_wrapper(
-            completions.retrieve,
-        )
-        self.list = async_to_streamed_response_wrapper(
-            completions.list,
-        )
diff --git a/src/llama_stack_client/resources/completions.py b/src/llama_stack_client/resources/completions.py
deleted file mode 100644
index 23554ccb..00000000
--- a/src/llama_stack_client/resources/completions.py
+++ /dev/null
@@ -1,737 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, List, Union, Iterable
-from typing_extensions import Literal, overload
-
-import httpx
-
-from ..types import completion_create_params
-from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._utils import required_args, maybe_transform, async_maybe_transform
-from .._compat import cached_property
-from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from .._streaming import Stream, AsyncStream
-from .._base_client import make_request_options
-from ..types.completion_create_response import CompletionCreateResponse
-
-__all__ = ["CompletionsResource", "AsyncCompletionsResource"]
-
-
-class CompletionsResource(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> CompletionsResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return CompletionsResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> CompletionsResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return CompletionsResourceWithStreamingResponse(self)
-
-    @overload
-    def create(
-        self,
-        *,
-        model: str,
-        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]]],
-        best_of: int | NotGiven = NOT_GIVEN,
-        echo: bool | NotGiven = NOT_GIVEN,
-        frequency_penalty: float | NotGiven = NOT_GIVEN,
-        guided_choice: List[str] | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN,
-        logprobs: bool | NotGiven = NOT_GIVEN,
-        max_tokens: int | NotGiven = NOT_GIVEN,
-        n: int | NotGiven = NOT_GIVEN,
-        presence_penalty: float | NotGiven = NOT_GIVEN,
-        prompt_logprobs: int | NotGiven = NOT_GIVEN,
-        seed: int | NotGiven = NOT_GIVEN,
-        stop: Union[str, List[str]] | NotGiven = NOT_GIVEN,
-        stream: Literal[False] | NotGiven = NOT_GIVEN,
-        stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        suffix: str | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> CompletionCreateResponse:
-        """
-        Generate an OpenAI-compatible completion for the given prompt using the
-        specified model.
-
-        Args:
-          model: The identifier of the model to use. The model must be registered with Llama
-              Stack and available via the /models endpoint.
-
-          prompt: The prompt to generate a completion for.
-
-          best_of: (Optional) The number of completions to generate.
-
-          echo: (Optional) Whether to echo the prompt.
-
-          frequency_penalty: (Optional) The penalty for repeated tokens.
-
-          logit_bias: (Optional) The logit bias to use.
-
-          logprobs: (Optional) The log probabilities to use.
-
-          max_tokens: (Optional) The maximum number of tokens to generate.
-
-          n: (Optional) The number of completions to generate.
-
-          presence_penalty: (Optional) The penalty for repeated tokens.
-
-          seed: (Optional) The seed to use.
-
-          stop: (Optional) The stop tokens to use.
-
-          stream: (Optional) Whether to stream the response.
-
-          stream_options: (Optional) The stream options to use.
-
-          suffix: (Optional) The suffix that should be appended to the completion.
-
-          temperature: (Optional) The temperature to use.
-
-          top_p: (Optional) The top p to use.
-
-          user: (Optional) The user to use.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def create(
-        self,
-        *,
-        model: str,
-        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]]],
-        stream: Literal[True],
-        best_of: int | NotGiven = NOT_GIVEN,
-        echo: bool | NotGiven = NOT_GIVEN,
-        frequency_penalty: float | NotGiven = NOT_GIVEN,
-        guided_choice: List[str] | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN,
-        logprobs: bool | NotGiven = NOT_GIVEN,
-        max_tokens: int | NotGiven = NOT_GIVEN,
-        n: int | NotGiven = NOT_GIVEN,
-        presence_penalty: float | NotGiven = NOT_GIVEN,
-        prompt_logprobs: int | NotGiven = NOT_GIVEN,
-        seed: int | NotGiven = NOT_GIVEN,
-        stop: Union[str, List[str]] | NotGiven = NOT_GIVEN,
-        stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        suffix: str | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Stream[CompletionCreateResponse]:
-        """
-        Generate an OpenAI-compatible completion for the given prompt using the
-        specified model.
-
-        Args:
-          model: The identifier of the model to use. The model must be registered with Llama
-              Stack and available via the /models endpoint.
-
-          prompt: The prompt to generate a completion for.
-
-          stream: (Optional) Whether to stream the response.
-
-          best_of: (Optional) The number of completions to generate.
-
-          echo: (Optional) Whether to echo the prompt.
-
-          frequency_penalty: (Optional) The penalty for repeated tokens.
-
-          logit_bias: (Optional) The logit bias to use.
-
-          logprobs: (Optional) The log probabilities to use.
-
-          max_tokens: (Optional) The maximum number of tokens to generate.
-
-          n: (Optional) The number of completions to generate.
-
-          presence_penalty: (Optional) The penalty for repeated tokens.
-
-          seed: (Optional) The seed to use.
-
-          stop: (Optional) The stop tokens to use.
-
-          stream_options: (Optional) The stream options to use.
-
-          suffix: (Optional) The suffix that should be appended to the completion.
-
-          temperature: (Optional) The temperature to use.
-
-          top_p: (Optional) The top p to use.
-
-          user: (Optional) The user to use.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def create(
-        self,
-        *,
-        model: str,
-        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]]],
-        stream: bool,
-        best_of: int | NotGiven = NOT_GIVEN,
-        echo: bool | NotGiven = NOT_GIVEN,
-        frequency_penalty: float | NotGiven = NOT_GIVEN,
-        guided_choice: List[str] | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN,
-        logprobs: bool | NotGiven = NOT_GIVEN,
-        max_tokens: int | NotGiven = NOT_GIVEN,
-        n: int | NotGiven = NOT_GIVEN,
-        presence_penalty: float | NotGiven = NOT_GIVEN,
-        prompt_logprobs: int | NotGiven = NOT_GIVEN,
-        seed: int | NotGiven = NOT_GIVEN,
-        stop: Union[str, List[str]] | NotGiven = NOT_GIVEN,
-        stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        suffix: str | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> CompletionCreateResponse | Stream[CompletionCreateResponse]:
-        """
-        Generate an OpenAI-compatible completion for the given prompt using the
-        specified model.
-
-        Args:
-          model: The identifier of the model to use. The model must be registered with Llama
-              Stack and available via the /models endpoint.
-
-          prompt: The prompt to generate a completion for.
-
-          stream: (Optional) Whether to stream the response.
-
-          best_of: (Optional) The number of completions to generate.
-
-          echo: (Optional) Whether to echo the prompt.
-
-          frequency_penalty: (Optional) The penalty for repeated tokens.
-
-          logit_bias: (Optional) The logit bias to use.
-
-          logprobs: (Optional) The log probabilities to use.
-
-          max_tokens: (Optional) The maximum number of tokens to generate.
-
-          n: (Optional) The number of completions to generate.
-
-          presence_penalty: (Optional) The penalty for repeated tokens.
-
-          seed: (Optional) The seed to use.
-
-          stop: (Optional) The stop tokens to use.
-
-          stream_options: (Optional) The stream options to use.
-
-          suffix: (Optional) The suffix that should be appended to the completion.
-
-          temperature: (Optional) The temperature to use.
-
-          top_p: (Optional) The top p to use.
-
-          user: (Optional) The user to use.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @required_args(["model", "prompt"], ["model", "prompt", "stream"])
-    def create(
-        self,
-        *,
-        model: str,
-        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]]],
-        best_of: int | NotGiven = NOT_GIVEN,
-        echo: bool | NotGiven = NOT_GIVEN,
-        frequency_penalty: float | NotGiven = NOT_GIVEN,
-        guided_choice: List[str] | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN,
-        logprobs: bool | NotGiven = NOT_GIVEN,
-        max_tokens: int | NotGiven = NOT_GIVEN,
-        n: int | NotGiven = NOT_GIVEN,
-        presence_penalty: float | NotGiven = NOT_GIVEN,
-        prompt_logprobs: int | NotGiven = NOT_GIVEN,
-        seed: int | NotGiven = NOT_GIVEN,
-        stop: Union[str, List[str]] | NotGiven = NOT_GIVEN,
-        stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
-        stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        suffix: str | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> CompletionCreateResponse | Stream[CompletionCreateResponse]:
-        return self._post(
-            "/v1/openai/v1/completions",
-            body=maybe_transform(
-                {
-                    "model": model,
-                    "prompt": prompt,
-                    "best_of": best_of,
-                    "echo": echo,
-                    "frequency_penalty": frequency_penalty,
-                    "guided_choice": guided_choice,
-                    "logit_bias": logit_bias,
-                    "logprobs": logprobs,
-                    "max_tokens": max_tokens,
-                    "n": n,
-                    "presence_penalty": presence_penalty,
-                    "prompt_logprobs": prompt_logprobs,
-                    "seed": seed,
-                    "stop": stop,
-                    "stream": stream,
-                    "stream_options": stream_options,
-                    "suffix": suffix,
-                    "temperature": temperature,
-                    "top_p": top_p,
-                    "user": user,
-                },
-                completion_create_params.CompletionCreateParamsStreaming
-                if stream
-                else completion_create_params.CompletionCreateParamsNonStreaming,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=CompletionCreateResponse,
-            stream=stream or False,
-            stream_cls=Stream[CompletionCreateResponse],
-        )
-
-
-class AsyncCompletionsResource(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncCompletionsResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncCompletionsResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncCompletionsResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncCompletionsResourceWithStreamingResponse(self)
-
-    @overload
-    async def create(
-        self,
-        *,
-        model: str,
-        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]]],
-        best_of: int | NotGiven = NOT_GIVEN,
-        echo: bool | NotGiven = NOT_GIVEN,
-        frequency_penalty: float | NotGiven = NOT_GIVEN,
-        guided_choice: List[str] | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN,
-        logprobs: bool | NotGiven = NOT_GIVEN,
-        max_tokens: int | NotGiven = NOT_GIVEN,
-        n: int | NotGiven = NOT_GIVEN,
-        presence_penalty: float | NotGiven = NOT_GIVEN,
-        prompt_logprobs: int | NotGiven = NOT_GIVEN,
-        seed: int | NotGiven = NOT_GIVEN,
-        stop: Union[str, List[str]] | NotGiven = NOT_GIVEN,
-        stream: Literal[False] | NotGiven = NOT_GIVEN,
-        stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        suffix: str | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> CompletionCreateResponse:
-        """
-        Generate an OpenAI-compatible completion for the given prompt using the
-        specified model.
-
-        Args:
-          model: The identifier of the model to use. The model must be registered with Llama
-              Stack and available via the /models endpoint.
-
-          prompt: The prompt to generate a completion for.
-
-          best_of: (Optional) The number of completions to generate.
-
-          echo: (Optional) Whether to echo the prompt.
-
-          frequency_penalty: (Optional) The penalty for repeated tokens.
-
-          logit_bias: (Optional) The logit bias to use.
-
-          logprobs: (Optional) The log probabilities to use.
-
-          max_tokens: (Optional) The maximum number of tokens to generate.
-
-          n: (Optional) The number of completions to generate.
-
-          presence_penalty: (Optional) The penalty for repeated tokens.
-
-          seed: (Optional) The seed to use.
-
-          stop: (Optional) The stop tokens to use.
-
-          stream: (Optional) Whether to stream the response.
-
-          stream_options: (Optional) The stream options to use.
-
-          suffix: (Optional) The suffix that should be appended to the completion.
-
-          temperature: (Optional) The temperature to use.
-
-          top_p: (Optional) The top p to use.
-
-          user: (Optional) The user to use.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def create(
-        self,
-        *,
-        model: str,
-        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]]],
-        stream: Literal[True],
-        best_of: int | NotGiven = NOT_GIVEN,
-        echo: bool | NotGiven = NOT_GIVEN,
-        frequency_penalty: float | NotGiven = NOT_GIVEN,
-        guided_choice: List[str] | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN,
-        logprobs: bool | NotGiven = NOT_GIVEN,
-        max_tokens: int | NotGiven = NOT_GIVEN,
-        n: int | NotGiven = NOT_GIVEN,
-        presence_penalty: float | NotGiven = NOT_GIVEN,
-        prompt_logprobs: int | NotGiven = NOT_GIVEN,
-        seed: int | NotGiven = NOT_GIVEN,
-        stop: Union[str, List[str]] | NotGiven = NOT_GIVEN,
-        stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        suffix: str | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncStream[CompletionCreateResponse]:
-        """
-        Generate an OpenAI-compatible completion for the given prompt using the
-        specified model.
-
-        Args:
-          model: The identifier of the model to use. The model must be registered with Llama
-              Stack and available via the /models endpoint.
-
-          prompt: The prompt to generate a completion for.
-
-          stream: (Optional) Whether to stream the response.
-
-          best_of: (Optional) The number of completions to generate.
-
-          echo: (Optional) Whether to echo the prompt.
-
-          frequency_penalty: (Optional) The penalty for repeated tokens.
-
-          logit_bias: (Optional) The logit bias to use.
-
-          logprobs: (Optional) The log probabilities to use.
-
-          max_tokens: (Optional) The maximum number of tokens to generate.
-
-          n: (Optional) The number of completions to generate.
-
-          presence_penalty: (Optional) The penalty for repeated tokens.
-
-          seed: (Optional) The seed to use.
-
-          stop: (Optional) The stop tokens to use.
-
-          stream_options: (Optional) The stream options to use.
-
-          suffix: (Optional) The suffix that should be appended to the completion.
-
-          temperature: (Optional) The temperature to use.
-
-          top_p: (Optional) The top p to use.
-
-          user: (Optional) The user to use.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def create(
-        self,
-        *,
-        model: str,
-        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]]],
-        stream: bool,
-        best_of: int | NotGiven = NOT_GIVEN,
-        echo: bool | NotGiven = NOT_GIVEN,
-        frequency_penalty: float | NotGiven = NOT_GIVEN,
-        guided_choice: List[str] | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN,
-        logprobs: bool | NotGiven = NOT_GIVEN,
-        max_tokens: int | NotGiven = NOT_GIVEN,
-        n: int | NotGiven = NOT_GIVEN,
-        presence_penalty: float | NotGiven = NOT_GIVEN,
-        prompt_logprobs: int | NotGiven = NOT_GIVEN,
-        seed: int | NotGiven = NOT_GIVEN,
-        stop: Union[str, List[str]] | NotGiven = NOT_GIVEN,
-        stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        suffix: str | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> CompletionCreateResponse | AsyncStream[CompletionCreateResponse]:
-        """
-        Generate an OpenAI-compatible completion for the given prompt using the
-        specified model.
-
-        Args:
-          model: The identifier of the model to use. The model must be registered with Llama
-              Stack and available via the /models endpoint.
-
-          prompt: The prompt to generate a completion for.
-
-          stream: (Optional) Whether to stream the response.
-
-          best_of: (Optional) The number of completions to generate.
-
-          echo: (Optional) Whether to echo the prompt.
-
-          frequency_penalty: (Optional) The penalty for repeated tokens.
-
-          logit_bias: (Optional) The logit bias to use.
-
-          logprobs: (Optional) The log probabilities to use.
-
-          max_tokens: (Optional) The maximum number of tokens to generate.
-
-          n: (Optional) The number of completions to generate.
-
-          presence_penalty: (Optional) The penalty for repeated tokens.
-
-          seed: (Optional) The seed to use.
-
-          stop: (Optional) The stop tokens to use.
-
-          stream_options: (Optional) The stream options to use.
-
-          suffix: (Optional) The suffix that should be appended to the completion.
-
-          temperature: (Optional) The temperature to use.
-
-          top_p: (Optional) The top p to use.
-
-          user: (Optional) The user to use.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @required_args(["model", "prompt"], ["model", "prompt", "stream"])
-    async def create(
-        self,
-        *,
-        model: str,
-        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]]],
-        best_of: int | NotGiven = NOT_GIVEN,
-        echo: bool | NotGiven = NOT_GIVEN,
-        frequency_penalty: float | NotGiven = NOT_GIVEN,
-        guided_choice: List[str] | NotGiven = NOT_GIVEN,
-        logit_bias: Dict[str, float] | NotGiven = NOT_GIVEN,
-        logprobs: bool | NotGiven = NOT_GIVEN,
-        max_tokens: int | NotGiven = NOT_GIVEN,
-        n: int | NotGiven = NOT_GIVEN,
-        presence_penalty: float | NotGiven = NOT_GIVEN,
-        prompt_logprobs: int | NotGiven = NOT_GIVEN,
-        seed: int | NotGiven = NOT_GIVEN,
-        stop: Union[str, List[str]] | NotGiven = NOT_GIVEN,
-        stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
-        stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        suffix: str | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        top_p: float | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> CompletionCreateResponse | AsyncStream[CompletionCreateResponse]:
-        return await self._post(
-            "/v1/openai/v1/completions",
-            body=await async_maybe_transform(
-                {
-                    "model": model,
-                    "prompt": prompt,
-                    "best_of": best_of,
-                    "echo": echo,
-                    "frequency_penalty": frequency_penalty,
-                    "guided_choice": guided_choice,
-                    "logit_bias": logit_bias,
-                    "logprobs": logprobs,
-                    "max_tokens": max_tokens,
-                    "n": n,
-                    "presence_penalty": presence_penalty,
-                    "prompt_logprobs": prompt_logprobs,
-                    "seed": seed,
-                    "stop": stop,
-                    "stream": stream,
-                    "stream_options": stream_options,
-                    "suffix": suffix,
-                    "temperature": temperature,
-                    "top_p": top_p,
-                    "user": user,
-                },
-                completion_create_params.CompletionCreateParamsStreaming
-                if stream
-                else completion_create_params.CompletionCreateParamsNonStreaming,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=CompletionCreateResponse,
-            stream=stream or False,
-            stream_cls=AsyncStream[CompletionCreateResponse],
-        )
-
-
-class CompletionsResourceWithRawResponse:
-    def __init__(self, completions: CompletionsResource) -> None:
-        self._completions = completions
-
-        self.create = to_raw_response_wrapper(
-            completions.create,
-        )
-
-
-class AsyncCompletionsResourceWithRawResponse:
-    def __init__(self, completions: AsyncCompletionsResource) -> None:
-        self._completions = completions
-
-        self.create = async_to_raw_response_wrapper(
-            completions.create,
-        )
-
-
-class CompletionsResourceWithStreamingResponse:
-    def __init__(self, completions: CompletionsResource) -> None:
-        self._completions = completions
-
-        self.create = to_streamed_response_wrapper(
-            completions.create,
-        )
-
-
-class AsyncCompletionsResourceWithStreamingResponse:
-    def __init__(self, completions: AsyncCompletionsResource) -> None:
-        self._completions = completions
-
-        self.create = async_to_streamed_response_wrapper(
-            completions.create,
-        )
diff --git a/src/llama_stack_client/resources/datasets.py b/src/llama_stack_client/resources/datasets.py
deleted file mode 100644
index 45dcaeba..00000000
--- a/src/llama_stack_client/resources/datasets.py
+++ /dev/null
@@ -1,588 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Type, Union, Iterable, cast
-from typing_extensions import Literal
-
-import httpx
-
-from ..types import dataset_iterrows_params, dataset_register_params
-from .._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
-from .._utils import maybe_transform, async_maybe_transform
-from .._compat import cached_property
-from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from .._wrappers import DataWrapper
-from .._base_client import make_request_options
-from ..types.dataset_list_response import DatasetListResponse
-from ..types.dataset_iterrows_response import DatasetIterrowsResponse
-from ..types.dataset_register_response import DatasetRegisterResponse
-from ..types.dataset_retrieve_response import DatasetRetrieveResponse
-
-__all__ = ["DatasetsResource", "AsyncDatasetsResource"]
-
-
-class DatasetsResource(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> DatasetsResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return DatasetsResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> DatasetsResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return DatasetsResourceWithStreamingResponse(self)
-
-    def retrieve(
-        self,
-        dataset_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> DatasetRetrieveResponse:
-        """
-        Get a dataset by its ID.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not dataset_id:
-            raise ValueError(f"Expected a non-empty value for `dataset_id` but received {dataset_id!r}")
-        return self._get(
-            f"/v1/datasets/{dataset_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=DatasetRetrieveResponse,
-        )
-
-    def list(
-        self,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> DatasetListResponse:
-        """List all datasets."""
-        return self._get(
-            "/v1/datasets",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                post_parser=DataWrapper[DatasetListResponse]._unwrapper,
-            ),
-            cast_to=cast(Type[DatasetListResponse], DataWrapper[DatasetListResponse]),
-        )
-
-    def iterrows(
-        self,
-        dataset_id: str,
-        *,
-        limit: int | NotGiven = NOT_GIVEN,
-        start_index: int | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> DatasetIterrowsResponse:
-        """Get a paginated list of rows from a dataset.
-
-        Uses offset-based pagination where:
-
-        - start_index: The starting index (0-based). If None, starts from beginning.
-        - limit: Number of items to return. If None or -1, returns all items.
-
-        The response includes:
-
-        - data: List of items for the current page.
-        - has_more: Whether there are more items available after this set.
-
-        Args:
-          limit: The number of rows to get.
-
-          start_index: Index into dataset for the first row to get. Get all rows if None.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not dataset_id:
-            raise ValueError(f"Expected a non-empty value for `dataset_id` but received {dataset_id!r}")
-        return self._get(
-            f"/v1/datasetio/iterrows/{dataset_id}",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform(
-                    {
-                        "limit": limit,
-                        "start_index": start_index,
-                    },
-                    dataset_iterrows_params.DatasetIterrowsParams,
-                ),
-            ),
-            cast_to=DatasetIterrowsResponse,
-        )
-
-    def register(
-        self,
-        *,
-        purpose: Literal["post-training/messages", "eval/question-answer", "eval/messages-answer"],
-        source: dataset_register_params.Source,
-        dataset_id: str | NotGiven = NOT_GIVEN,
-        metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> DatasetRegisterResponse:
-        """Register a new dataset.
-
-        Args:
-          purpose: The purpose of the dataset.
-
-        One of: - "post-training/messages": The dataset
-              contains a messages column with list of messages for post-training. {
-              "messages": [ {"role": "user", "content": "Hello, world!"}, {"role":
-              "assistant", "content": "Hello, world!"}, ] } - "eval/question-answer": The
-              dataset contains a question column and an answer column for evaluation. {
-              "question": "What is the capital of France?", "answer": "Paris" } -
-              "eval/messages-answer": The dataset contains a messages column with list of
-              messages and an answer column for evaluation. { "messages": [ {"role": "user",
-              "content": "Hello, my name is John Doe."}, {"role": "assistant", "content":
-              "Hello, John Doe. How can I help you today?"}, {"role": "user", "content":
-              "What's my name?"}, ], "answer": "John Doe" }
-
-          source: The data source of the dataset. Ensure that the data source schema is compatible
-              with the purpose of the dataset. Examples: - { "type": "uri", "uri":
-              "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
-              "lsfs://mydata.jsonl" } - { "type": "uri", "uri":
-              "data:csv;base64,{base64_content}" } - { "type": "uri", "uri":
-              "huggingface://llamastack/simpleqa?split=train" } - { "type": "rows", "rows": [
-              { "messages": [ {"role": "user", "content": "Hello, world!"}, {"role":
-              "assistant", "content": "Hello, world!"}, ] } ] }
-
-          dataset_id: The ID of the dataset. If not provided, an ID will be generated.
-
-          metadata: The metadata for the dataset. - E.g. {"description": "My dataset"}.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._post(
-            "/v1/datasets",
-            body=maybe_transform(
-                {
-                    "purpose": purpose,
-                    "source": source,
-                    "dataset_id": dataset_id,
-                    "metadata": metadata,
-                },
-                dataset_register_params.DatasetRegisterParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=DatasetRegisterResponse,
-        )
-
-    def unregister(
-        self,
-        dataset_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> None:
-        """
-        Unregister a dataset by its ID.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not dataset_id:
-            raise ValueError(f"Expected a non-empty value for `dataset_id` but received {dataset_id!r}")
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        return self._delete(
-            f"/v1/datasets/{dataset_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=NoneType,
-        )
-
-
-class AsyncDatasetsResource(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncDatasetsResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncDatasetsResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncDatasetsResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncDatasetsResourceWithStreamingResponse(self)
-
-    async def retrieve(
-        self,
-        dataset_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> DatasetRetrieveResponse:
-        """
-        Get a dataset by its ID.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not dataset_id:
-            raise ValueError(f"Expected a non-empty value for `dataset_id` but received {dataset_id!r}")
-        return await self._get(
-            f"/v1/datasets/{dataset_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=DatasetRetrieveResponse,
-        )
-
-    async def list(
-        self,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> DatasetListResponse:
-        """List all datasets."""
-        return await self._get(
-            "/v1/datasets",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                post_parser=DataWrapper[DatasetListResponse]._unwrapper,
-            ),
-            cast_to=cast(Type[DatasetListResponse], DataWrapper[DatasetListResponse]),
-        )
-
-    async def iterrows(
-        self,
-        dataset_id: str,
-        *,
-        limit: int | NotGiven = NOT_GIVEN,
-        start_index: int | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> DatasetIterrowsResponse:
-        """Get a paginated list of rows from a dataset.
-
-        Uses offset-based pagination where:
-
-        - start_index: The starting index (0-based). If None, starts from beginning.
-        - limit: Number of items to return. If None or -1, returns all items.
-
-        The response includes:
-
-        - data: List of items for the current page.
-        - has_more: Whether there are more items available after this set.
-
-        Args:
-          limit: The number of rows to get.
-
-          start_index: Index into dataset for the first row to get. Get all rows if None.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not dataset_id:
-            raise ValueError(f"Expected a non-empty value for `dataset_id` but received {dataset_id!r}")
-        return await self._get(
-            f"/v1/datasetio/iterrows/{dataset_id}",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=await async_maybe_transform(
-                    {
-                        "limit": limit,
-                        "start_index": start_index,
-                    },
-                    dataset_iterrows_params.DatasetIterrowsParams,
-                ),
-            ),
-            cast_to=DatasetIterrowsResponse,
-        )
-
-    async def register(
-        self,
-        *,
-        purpose: Literal["post-training/messages", "eval/question-answer", "eval/messages-answer"],
-        source: dataset_register_params.Source,
-        dataset_id: str | NotGiven = NOT_GIVEN,
-        metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> DatasetRegisterResponse:
-        """Register a new dataset.
-
-        Args:
-          purpose: The purpose of the dataset.
-
-        One of: - "post-training/messages": The dataset
-              contains a messages column with list of messages for post-training. {
-              "messages": [ {"role": "user", "content": "Hello, world!"}, {"role":
-              "assistant", "content": "Hello, world!"}, ] } - "eval/question-answer": The
-              dataset contains a question column and an answer column for evaluation. {
-              "question": "What is the capital of France?", "answer": "Paris" } -
-              "eval/messages-answer": The dataset contains a messages column with list of
-              messages and an answer column for evaluation. { "messages": [ {"role": "user",
-              "content": "Hello, my name is John Doe."}, {"role": "assistant", "content":
-              "Hello, John Doe. How can I help you today?"}, {"role": "user", "content":
-              "What's my name?"}, ], "answer": "John Doe" }
-
-          source: The data source of the dataset. Ensure that the data source schema is compatible
-              with the purpose of the dataset. Examples: - { "type": "uri", "uri":
-              "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
-              "lsfs://mydata.jsonl" } - { "type": "uri", "uri":
-              "data:csv;base64,{base64_content}" } - { "type": "uri", "uri":
-              "huggingface://llamastack/simpleqa?split=train" } - { "type": "rows", "rows": [
-              { "messages": [ {"role": "user", "content": "Hello, world!"}, {"role":
-              "assistant", "content": "Hello, world!"}, ] } ] }
-
-          dataset_id: The ID of the dataset. If not provided, an ID will be generated.
-
-          metadata: The metadata for the dataset. - E.g. {"description": "My dataset"}.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return await self._post(
-            "/v1/datasets",
-            body=await async_maybe_transform(
-                {
-                    "purpose": purpose,
-                    "source": source,
-                    "dataset_id": dataset_id,
-                    "metadata": metadata,
-                },
-                dataset_register_params.DatasetRegisterParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=DatasetRegisterResponse,
-        )
-
-    async def unregister(
-        self,
-        dataset_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> None:
-        """
-        Unregister a dataset by its ID.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not dataset_id:
-            raise ValueError(f"Expected a non-empty value for `dataset_id` but received {dataset_id!r}")
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        return await self._delete(
-            f"/v1/datasets/{dataset_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=NoneType,
-        )
-
-
-class DatasetsResourceWithRawResponse:
-    def __init__(self, datasets: DatasetsResource) -> None:
-        self._datasets = datasets
-
-        self.retrieve = to_raw_response_wrapper(
-            datasets.retrieve,
-        )
-        self.list = to_raw_response_wrapper(
-            datasets.list,
-        )
-        self.iterrows = to_raw_response_wrapper(
-            datasets.iterrows,
-        )
-        self.register = to_raw_response_wrapper(
-            datasets.register,
-        )
-        self.unregister = to_raw_response_wrapper(
-            datasets.unregister,
-        )
-
-
-class AsyncDatasetsResourceWithRawResponse:
-    def __init__(self, datasets: AsyncDatasetsResource) -> None:
-        self._datasets = datasets
-
-        self.retrieve = async_to_raw_response_wrapper(
-            datasets.retrieve,
-        )
-        self.list = async_to_raw_response_wrapper(
-            datasets.list,
-        )
-        self.iterrows = async_to_raw_response_wrapper(
-            datasets.iterrows,
-        )
-        self.register = async_to_raw_response_wrapper(
-            datasets.register,
-        )
-        self.unregister = async_to_raw_response_wrapper(
-            datasets.unregister,
-        )
-
-
-class DatasetsResourceWithStreamingResponse:
-    def __init__(self, datasets: DatasetsResource) -> None:
-        self._datasets = datasets
-
-        self.retrieve = to_streamed_response_wrapper(
-            datasets.retrieve,
-        )
-        self.list = to_streamed_response_wrapper(
-            datasets.list,
-        )
-        self.iterrows = to_streamed_response_wrapper(
-            datasets.iterrows,
-        )
-        self.register = to_streamed_response_wrapper(
-            datasets.register,
-        )
-        self.unregister = to_streamed_response_wrapper(
-            datasets.unregister,
-        )
-
-
-class AsyncDatasetsResourceWithStreamingResponse:
-    def __init__(self, datasets: AsyncDatasetsResource) -> None:
-        self._datasets = datasets
-
-        self.retrieve = async_to_streamed_response_wrapper(
-            datasets.retrieve,
-        )
-        self.list = async_to_streamed_response_wrapper(
-            datasets.list,
-        )
-        self.iterrows = async_to_streamed_response_wrapper(
-            datasets.iterrows,
-        )
-        self.register = async_to_streamed_response_wrapper(
-            datasets.register,
-        )
-        self.unregister = async_to_streamed_response_wrapper(
-            datasets.unregister,
-        )
diff --git a/src/llama_stack_client/resources/embeddings.py b/src/llama_stack_client/resources/embeddings.py
deleted file mode 100644
index 144ebbf2..00000000
--- a/src/llama_stack_client/resources/embeddings.py
+++ /dev/null
@@ -1,223 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import List, Union
-
-import httpx
-
-from ..types import embedding_create_params
-from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._utils import maybe_transform, async_maybe_transform
-from .._compat import cached_property
-from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from .._base_client import make_request_options
-from ..types.create_embeddings_response import CreateEmbeddingsResponse
-
-__all__ = ["EmbeddingsResource", "AsyncEmbeddingsResource"]
-
-
-class EmbeddingsResource(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> EmbeddingsResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return EmbeddingsResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> EmbeddingsResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return EmbeddingsResourceWithStreamingResponse(self)
-
-    def create(
-        self,
-        *,
-        input: Union[str, List[str]],
-        model: str,
-        dimensions: int | NotGiven = NOT_GIVEN,
-        encoding_format: str | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> CreateEmbeddingsResponse:
-        """
-        Generate OpenAI-compatible embeddings for the given input using the specified
-        model.
-
-        Args:
-          input: Input text to embed, encoded as a string or array of strings. To embed multiple
-              inputs in a single request, pass an array of strings.
-
-          model: The identifier of the model to use. The model must be an embedding model
-              registered with Llama Stack and available via the /models endpoint.
-
-          dimensions: (Optional) The number of dimensions the resulting output embeddings should have.
-              Only supported in text-embedding-3 and later models.
-
-          encoding_format: (Optional) The format to return the embeddings in. Can be either "float" or
-              "base64". Defaults to "float".
-
-          user: (Optional) A unique identifier representing your end-user, which can help OpenAI
-              to monitor and detect abuse.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._post(
-            "/v1/openai/v1/embeddings",
-            body=maybe_transform(
-                {
-                    "input": input,
-                    "model": model,
-                    "dimensions": dimensions,
-                    "encoding_format": encoding_format,
-                    "user": user,
-                },
-                embedding_create_params.EmbeddingCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=CreateEmbeddingsResponse,
-        )
-
-
-class AsyncEmbeddingsResource(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncEmbeddingsResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncEmbeddingsResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncEmbeddingsResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncEmbeddingsResourceWithStreamingResponse(self)
-
-    async def create(
-        self,
-        *,
-        input: Union[str, List[str]],
-        model: str,
-        dimensions: int | NotGiven = NOT_GIVEN,
-        encoding_format: str | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> CreateEmbeddingsResponse:
-        """
-        Generate OpenAI-compatible embeddings for the given input using the specified
-        model.
-
-        Args:
-          input: Input text to embed, encoded as a string or array of strings. To embed multiple
-              inputs in a single request, pass an array of strings.
-
-          model: The identifier of the model to use. The model must be an embedding model
-              registered with Llama Stack and available via the /models endpoint.
-
-          dimensions: (Optional) The number of dimensions the resulting output embeddings should have.
-              Only supported in text-embedding-3 and later models.
-
-          encoding_format: (Optional) The format to return the embeddings in. Can be either "float" or
-              "base64". Defaults to "float".
-
-          user: (Optional) A unique identifier representing your end-user, which can help OpenAI
-              to monitor and detect abuse.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return await self._post(
-            "/v1/openai/v1/embeddings",
-            body=await async_maybe_transform(
-                {
-                    "input": input,
-                    "model": model,
-                    "dimensions": dimensions,
-                    "encoding_format": encoding_format,
-                    "user": user,
-                },
-                embedding_create_params.EmbeddingCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=CreateEmbeddingsResponse,
-        )
-
-
-class EmbeddingsResourceWithRawResponse:
-    def __init__(self, embeddings: EmbeddingsResource) -> None:
-        self._embeddings = embeddings
-
-        self.create = to_raw_response_wrapper(
-            embeddings.create,
-        )
-
-
-class AsyncEmbeddingsResourceWithRawResponse:
-    def __init__(self, embeddings: AsyncEmbeddingsResource) -> None:
-        self._embeddings = embeddings
-
-        self.create = async_to_raw_response_wrapper(
-            embeddings.create,
-        )
-
-
-class EmbeddingsResourceWithStreamingResponse:
-    def __init__(self, embeddings: EmbeddingsResource) -> None:
-        self._embeddings = embeddings
-
-        self.create = to_streamed_response_wrapper(
-            embeddings.create,
-        )
-
-
-class AsyncEmbeddingsResourceWithStreamingResponse:
-    def __init__(self, embeddings: AsyncEmbeddingsResource) -> None:
-        self._embeddings = embeddings
-
-        self.create = async_to_streamed_response_wrapper(
-            embeddings.create,
-        )
diff --git a/src/llama_stack_client/resources/eval/__init__.py b/src/llama_stack_client/resources/eval/__init__.py
deleted file mode 100644
index f6473395..00000000
--- a/src/llama_stack_client/resources/eval/__init__.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .eval import (
-    EvalResource,
-    AsyncEvalResource,
-    EvalResourceWithRawResponse,
-    AsyncEvalResourceWithRawResponse,
-    EvalResourceWithStreamingResponse,
-    AsyncEvalResourceWithStreamingResponse,
-)
-from .jobs import (
-    JobsResource,
-    AsyncJobsResource,
-    JobsResourceWithRawResponse,
-    AsyncJobsResourceWithRawResponse,
-    JobsResourceWithStreamingResponse,
-    AsyncJobsResourceWithStreamingResponse,
-)
-
-__all__ = [
-    "JobsResource",
-    "AsyncJobsResource",
-    "JobsResourceWithRawResponse",
-    "AsyncJobsResourceWithRawResponse",
-    "JobsResourceWithStreamingResponse",
-    "AsyncJobsResourceWithStreamingResponse",
-    "EvalResource",
-    "AsyncEvalResource",
-    "EvalResourceWithRawResponse",
-    "AsyncEvalResourceWithRawResponse",
-    "EvalResourceWithStreamingResponse",
-    "AsyncEvalResourceWithStreamingResponse",
-]
diff --git a/src/llama_stack_client/resources/eval/eval.py b/src/llama_stack_client/resources/eval/eval.py
deleted file mode 100644
index 006f1717..00000000
--- a/src/llama_stack_client/resources/eval/eval.py
+++ /dev/null
@@ -1,530 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, List, Union, Iterable
-
-import httpx
-
-from .jobs import (
-    JobsResource,
-    AsyncJobsResource,
-    JobsResourceWithRawResponse,
-    AsyncJobsResourceWithRawResponse,
-    JobsResourceWithStreamingResponse,
-    AsyncJobsResourceWithStreamingResponse,
-)
-from ...types import (
-    eval_run_eval_params,
-    eval_evaluate_rows_params,
-    eval_run_eval_alpha_params,
-    eval_evaluate_rows_alpha_params,
-)
-from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ..._utils import maybe_transform, async_maybe_transform
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from ...types.job import Job
-from ..._base_client import make_request_options
-from ...types.evaluate_response import EvaluateResponse
-from ...types.benchmark_config_param import BenchmarkConfigParam
-
-__all__ = ["EvalResource", "AsyncEvalResource"]
-
-
-class EvalResource(SyncAPIResource):
-    @cached_property
-    def jobs(self) -> JobsResource:
-        return JobsResource(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> EvalResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return EvalResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> EvalResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return EvalResourceWithStreamingResponse(self)
-
-    def evaluate_rows(
-        self,
-        benchmark_id: str,
-        *,
-        benchmark_config: BenchmarkConfigParam,
-        input_rows: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]],
-        scoring_functions: List[str],
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> EvaluateResponse:
-        """
-        Evaluate a list of rows on a benchmark.
-
-        Args:
-          benchmark_config: The configuration for the benchmark.
-
-          input_rows: The rows to evaluate.
-
-          scoring_functions: The scoring functions to use for the evaluation.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not benchmark_id:
-            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
-        return self._post(
-            f"/v1/eval/benchmarks/{benchmark_id}/evaluations",
-            body=maybe_transform(
-                {
-                    "benchmark_config": benchmark_config,
-                    "input_rows": input_rows,
-                    "scoring_functions": scoring_functions,
-                },
-                eval_evaluate_rows_params.EvalEvaluateRowsParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=EvaluateResponse,
-        )
-
-    def evaluate_rows_alpha(
-        self,
-        benchmark_id: str,
-        *,
-        benchmark_config: BenchmarkConfigParam,
-        input_rows: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]],
-        scoring_functions: List[str],
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> EvaluateResponse:
-        """
-        Evaluate a list of rows on a benchmark.
-
-        Args:
-          benchmark_config: The configuration for the benchmark.
-
-          input_rows: The rows to evaluate.
-
-          scoring_functions: The scoring functions to use for the evaluation.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not benchmark_id:
-            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
-        return self._post(
-            f"/v1/eval/benchmarks/{benchmark_id}/evaluations",
-            body=maybe_transform(
-                {
-                    "benchmark_config": benchmark_config,
-                    "input_rows": input_rows,
-                    "scoring_functions": scoring_functions,
-                },
-                eval_evaluate_rows_alpha_params.EvalEvaluateRowsAlphaParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=EvaluateResponse,
-        )
-
-    def run_eval(
-        self,
-        benchmark_id: str,
-        *,
-        benchmark_config: BenchmarkConfigParam,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Job:
-        """
-        Run an evaluation on a benchmark.
-
-        Args:
-          benchmark_config: The configuration for the benchmark.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not benchmark_id:
-            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
-        return self._post(
-            f"/v1/eval/benchmarks/{benchmark_id}/jobs",
-            body=maybe_transform({"benchmark_config": benchmark_config}, eval_run_eval_params.EvalRunEvalParams),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Job,
-        )
-
-    def run_eval_alpha(
-        self,
-        benchmark_id: str,
-        *,
-        benchmark_config: BenchmarkConfigParam,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Job:
-        """
-        Run an evaluation on a benchmark.
-
-        Args:
-          benchmark_config: The configuration for the benchmark.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not benchmark_id:
-            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
-        return self._post(
-            f"/v1/eval/benchmarks/{benchmark_id}/jobs",
-            body=maybe_transform(
-                {"benchmark_config": benchmark_config}, eval_run_eval_alpha_params.EvalRunEvalAlphaParams
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Job,
-        )
-
-
-class AsyncEvalResource(AsyncAPIResource):
-    @cached_property
-    def jobs(self) -> AsyncJobsResource:
-        return AsyncJobsResource(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> AsyncEvalResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncEvalResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncEvalResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncEvalResourceWithStreamingResponse(self)
-
-    async def evaluate_rows(
-        self,
-        benchmark_id: str,
-        *,
-        benchmark_config: BenchmarkConfigParam,
-        input_rows: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]],
-        scoring_functions: List[str],
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> EvaluateResponse:
-        """
-        Evaluate a list of rows on a benchmark.
-
-        Args:
-          benchmark_config: The configuration for the benchmark.
-
-          input_rows: The rows to evaluate.
-
-          scoring_functions: The scoring functions to use for the evaluation.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not benchmark_id:
-            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
-        return await self._post(
-            f"/v1/eval/benchmarks/{benchmark_id}/evaluations",
-            body=await async_maybe_transform(
-                {
-                    "benchmark_config": benchmark_config,
-                    "input_rows": input_rows,
-                    "scoring_functions": scoring_functions,
-                },
-                eval_evaluate_rows_params.EvalEvaluateRowsParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=EvaluateResponse,
-        )
-
-    async def evaluate_rows_alpha(
-        self,
-        benchmark_id: str,
-        *,
-        benchmark_config: BenchmarkConfigParam,
-        input_rows: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]],
-        scoring_functions: List[str],
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> EvaluateResponse:
-        """
-        Evaluate a list of rows on a benchmark.
-
-        Args:
-          benchmark_config: The configuration for the benchmark.
-
-          input_rows: The rows to evaluate.
-
-          scoring_functions: The scoring functions to use for the evaluation.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not benchmark_id:
-            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
-        return await self._post(
-            f"/v1/eval/benchmarks/{benchmark_id}/evaluations",
-            body=await async_maybe_transform(
-                {
-                    "benchmark_config": benchmark_config,
-                    "input_rows": input_rows,
-                    "scoring_functions": scoring_functions,
-                },
-                eval_evaluate_rows_alpha_params.EvalEvaluateRowsAlphaParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=EvaluateResponse,
-        )
-
-    async def run_eval(
-        self,
-        benchmark_id: str,
-        *,
-        benchmark_config: BenchmarkConfigParam,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Job:
-        """
-        Run an evaluation on a benchmark.
-
-        Args:
-          benchmark_config: The configuration for the benchmark.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not benchmark_id:
-            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
-        return await self._post(
-            f"/v1/eval/benchmarks/{benchmark_id}/jobs",
-            body=await async_maybe_transform(
-                {"benchmark_config": benchmark_config}, eval_run_eval_params.EvalRunEvalParams
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Job,
-        )
-
-    async def run_eval_alpha(
-        self,
-        benchmark_id: str,
-        *,
-        benchmark_config: BenchmarkConfigParam,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Job:
-        """
-        Run an evaluation on a benchmark.
-
-        Args:
-          benchmark_config: The configuration for the benchmark.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not benchmark_id:
-            raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
-        return await self._post(
-            f"/v1/eval/benchmarks/{benchmark_id}/jobs",
-            body=await async_maybe_transform(
-                {"benchmark_config": benchmark_config}, eval_run_eval_alpha_params.EvalRunEvalAlphaParams
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Job,
-        )
-
-
-class EvalResourceWithRawResponse:
-    def __init__(self, eval: EvalResource) -> None:
-        self._eval = eval
-
-        self.evaluate_rows = to_raw_response_wrapper(
-            eval.evaluate_rows,
-        )
-        self.evaluate_rows_alpha = to_raw_response_wrapper(
-            eval.evaluate_rows_alpha,
-        )
-        self.run_eval = to_raw_response_wrapper(
-            eval.run_eval,
-        )
-        self.run_eval_alpha = to_raw_response_wrapper(
-            eval.run_eval_alpha,
-        )
-
-    @cached_property
-    def jobs(self) -> JobsResourceWithRawResponse:
-        return JobsResourceWithRawResponse(self._eval.jobs)
-
-
-class AsyncEvalResourceWithRawResponse:
-    def __init__(self, eval: AsyncEvalResource) -> None:
-        self._eval = eval
-
-        self.evaluate_rows = async_to_raw_response_wrapper(
-            eval.evaluate_rows,
-        )
-        self.evaluate_rows_alpha = async_to_raw_response_wrapper(
-            eval.evaluate_rows_alpha,
-        )
-        self.run_eval = async_to_raw_response_wrapper(
-            eval.run_eval,
-        )
-        self.run_eval_alpha = async_to_raw_response_wrapper(
-            eval.run_eval_alpha,
-        )
-
-    @cached_property
-    def jobs(self) -> AsyncJobsResourceWithRawResponse:
-        return AsyncJobsResourceWithRawResponse(self._eval.jobs)
-
-
-class EvalResourceWithStreamingResponse:
-    def __init__(self, eval: EvalResource) -> None:
-        self._eval = eval
-
-        self.evaluate_rows = to_streamed_response_wrapper(
-            eval.evaluate_rows,
-        )
-        self.evaluate_rows_alpha = to_streamed_response_wrapper(
-            eval.evaluate_rows_alpha,
-        )
-        self.run_eval = to_streamed_response_wrapper(
-            eval.run_eval,
-        )
-        self.run_eval_alpha = to_streamed_response_wrapper(
-            eval.run_eval_alpha,
-        )
-
-    @cached_property
-    def jobs(self) -> JobsResourceWithStreamingResponse:
-        return JobsResourceWithStreamingResponse(self._eval.jobs)
-
-
-class AsyncEvalResourceWithStreamingResponse:
-    def __init__(self, eval: AsyncEvalResource) -> None:
-        self._eval = eval
-
-        self.evaluate_rows = async_to_streamed_response_wrapper(
-            eval.evaluate_rows,
-        )
-        self.evaluate_rows_alpha = async_to_streamed_response_wrapper(
-            eval.evaluate_rows_alpha,
-        )
-        self.run_eval = async_to_streamed_response_wrapper(
-            eval.run_eval,
-        )
-        self.run_eval_alpha = async_to_streamed_response_wrapper(
-            eval.run_eval_alpha,
-        )
-
-    @cached_property
-    def jobs(self) -> AsyncJobsResourceWithStreamingResponse:
-        return AsyncJobsResourceWithStreamingResponse(self._eval.jobs)
diff --git a/src/llama_stack_client/resources/files.py b/src/llama_stack_client/resources/files.py
deleted file mode 100644
index 3eac6486..00000000
--- a/src/llama_stack_client/resources/files.py
+++ /dev/null
@@ -1,572 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Mapping, cast
-from typing_extensions import Literal
-
-import httpx
-
-from ..types import file_list_params, file_create_params
-from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
-from .._utils import extract_files, maybe_transform, deepcopy_minimal, async_maybe_transform
-from .._compat import cached_property
-from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from ..types.file import File
-from .._base_client import make_request_options
-from ..types.list_files_response import ListFilesResponse
-from ..types.delete_file_response import DeleteFileResponse
-
-__all__ = ["FilesResource", "AsyncFilesResource"]
-
-
-class FilesResource(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> FilesResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return FilesResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> FilesResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return FilesResourceWithStreamingResponse(self)
-
-    def create(
-        self,
-        *,
-        file: FileTypes,
-        purpose: Literal["assistants"],
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> File:
-        """Upload a file that can be used across various endpoints.
-
-        The file upload should
-        be a multipart form request with:
-
-        - file: The File object (not file name) to be uploaded.
-        - purpose: The intended purpose of the uploaded file.
-
-        Args:
-          purpose: Valid purpose values for OpenAI Files API.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        body = deepcopy_minimal(
-            {
-                "file": file,
-                "purpose": purpose,
-            }
-        )
-        files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
-        # It should be noted that the actual Content-Type header that will be
-        # sent to the server will contain a `boundary` parameter, e.g.
-        # multipart/form-data; boundary=---abc--
-        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
-        return self._post(
-            "/v1/openai/v1/files",
-            body=maybe_transform(body, file_create_params.FileCreateParams),
-            files=files,
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=File,
-        )
-
-    def retrieve(
-        self,
-        file_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> File:
-        """
-        Returns information about a specific file.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not file_id:
-            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
-        return self._get(
-            f"/v1/openai/v1/files/{file_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=File,
-        )
-
-    def list(
-        self,
-        *,
-        after: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
-        purpose: Literal["assistants"] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ListFilesResponse:
-        """
-        Returns a list of files that belong to the user's organization.
-
-        Args:
-          after: A cursor for use in pagination. `after` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include after=obj_foo in order to
-              fetch the next page of the list.
-
-          limit: A limit on the number of objects to be returned. Limit can range between 1 and
-              10,000, and the default is 10,000.
-
-          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
-              order and `desc` for descending order.
-
-          purpose: Only return files with the given purpose.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._get(
-            "/v1/openai/v1/files",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform(
-                    {
-                        "after": after,
-                        "limit": limit,
-                        "order": order,
-                        "purpose": purpose,
-                    },
-                    file_list_params.FileListParams,
-                ),
-            ),
-            cast_to=ListFilesResponse,
-        )
-
-    def delete(
-        self,
-        file_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> DeleteFileResponse:
-        """
-        Delete a file.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not file_id:
-            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
-        return self._delete(
-            f"/v1/openai/v1/files/{file_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=DeleteFileResponse,
-        )
-
-    def content(
-        self,
-        file_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> object:
-        """
-        Returns the contents of the specified file.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not file_id:
-            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
-        return self._get(
-            f"/v1/openai/v1/files/{file_id}/content",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=object,
-        )
-
-
-class AsyncFilesResource(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncFilesResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncFilesResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncFilesResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncFilesResourceWithStreamingResponse(self)
-
-    async def create(
-        self,
-        *,
-        file: FileTypes,
-        purpose: Literal["assistants"],
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> File:
-        """Upload a file that can be used across various endpoints.
-
-        The file upload should
-        be a multipart form request with:
-
-        - file: The File object (not file name) to be uploaded.
-        - purpose: The intended purpose of the uploaded file.
-
-        Args:
-          purpose: Valid purpose values for OpenAI Files API.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        body = deepcopy_minimal(
-            {
-                "file": file,
-                "purpose": purpose,
-            }
-        )
-        files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
-        # It should be noted that the actual Content-Type header that will be
-        # sent to the server will contain a `boundary` parameter, e.g.
-        # multipart/form-data; boundary=---abc--
-        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
-        return await self._post(
-            "/v1/openai/v1/files",
-            body=await async_maybe_transform(body, file_create_params.FileCreateParams),
-            files=files,
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=File,
-        )
-
-    async def retrieve(
-        self,
-        file_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> File:
-        """
-        Returns information about a specific file.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not file_id:
-            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
-        return await self._get(
-            f"/v1/openai/v1/files/{file_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=File,
-        )
-
-    async def list(
-        self,
-        *,
-        after: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
-        purpose: Literal["assistants"] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ListFilesResponse:
-        """
-        Returns a list of files that belong to the user's organization.
-
-        Args:
-          after: A cursor for use in pagination. `after` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include after=obj_foo in order to
-              fetch the next page of the list.
-
-          limit: A limit on the number of objects to be returned. Limit can range between 1 and
-              10,000, and the default is 10,000.
-
-          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
-              order and `desc` for descending order.
-
-          purpose: Only return files with the given purpose.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return await self._get(
-            "/v1/openai/v1/files",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=await async_maybe_transform(
-                    {
-                        "after": after,
-                        "limit": limit,
-                        "order": order,
-                        "purpose": purpose,
-                    },
-                    file_list_params.FileListParams,
-                ),
-            ),
-            cast_to=ListFilesResponse,
-        )
-
-    async def delete(
-        self,
-        file_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> DeleteFileResponse:
-        """
-        Delete a file.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not file_id:
-            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
-        return await self._delete(
-            f"/v1/openai/v1/files/{file_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=DeleteFileResponse,
-        )
-
-    async def content(
-        self,
-        file_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> object:
-        """
-        Returns the contents of the specified file.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not file_id:
-            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
-        return await self._get(
-            f"/v1/openai/v1/files/{file_id}/content",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=object,
-        )
-
-
-class FilesResourceWithRawResponse:
-    def __init__(self, files: FilesResource) -> None:
-        self._files = files
-
-        self.create = to_raw_response_wrapper(
-            files.create,
-        )
-        self.retrieve = to_raw_response_wrapper(
-            files.retrieve,
-        )
-        self.list = to_raw_response_wrapper(
-            files.list,
-        )
-        self.delete = to_raw_response_wrapper(
-            files.delete,
-        )
-        self.content = to_raw_response_wrapper(
-            files.content,
-        )
-
-
-class AsyncFilesResourceWithRawResponse:
-    def __init__(self, files: AsyncFilesResource) -> None:
-        self._files = files
-
-        self.create = async_to_raw_response_wrapper(
-            files.create,
-        )
-        self.retrieve = async_to_raw_response_wrapper(
-            files.retrieve,
-        )
-        self.list = async_to_raw_response_wrapper(
-            files.list,
-        )
-        self.delete = async_to_raw_response_wrapper(
-            files.delete,
-        )
-        self.content = async_to_raw_response_wrapper(
-            files.content,
-        )
-
-
-class FilesResourceWithStreamingResponse:
-    def __init__(self, files: FilesResource) -> None:
-        self._files = files
-
-        self.create = to_streamed_response_wrapper(
-            files.create,
-        )
-        self.retrieve = to_streamed_response_wrapper(
-            files.retrieve,
-        )
-        self.list = to_streamed_response_wrapper(
-            files.list,
-        )
-        self.delete = to_streamed_response_wrapper(
-            files.delete,
-        )
-        self.content = to_streamed_response_wrapper(
-            files.content,
-        )
-
-
-class AsyncFilesResourceWithStreamingResponse:
-    def __init__(self, files: AsyncFilesResource) -> None:
-        self._files = files
-
-        self.create = async_to_streamed_response_wrapper(
-            files.create,
-        )
-        self.retrieve = async_to_streamed_response_wrapper(
-            files.retrieve,
-        )
-        self.list = async_to_streamed_response_wrapper(
-            files.list,
-        )
-        self.delete = async_to_streamed_response_wrapper(
-            files.delete,
-        )
-        self.content = async_to_streamed_response_wrapper(
-            files.content,
-        )
diff --git a/src/llama_stack_client/resources/inference.py b/src/llama_stack_client/resources/inference.py
deleted file mode 100644
index 84a8dd96..00000000
--- a/src/llama_stack_client/resources/inference.py
+++ /dev/null
@@ -1,1401 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import List, Union, Iterable
-from typing_extensions import Literal, overload
-
-import httpx
-
-from ..types import (
-    inference_completion_params,
-    inference_embeddings_params,
-    inference_chat_completion_params,
-    inference_batch_completion_params,
-    inference_batch_chat_completion_params,
-)
-from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._utils import required_args, maybe_transform, async_maybe_transform
-from .._compat import cached_property
-from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from .._streaming import Stream, AsyncStream
-from .._base_client import make_request_options
-from ..types.completion_response import CompletionResponse
-from ..types.embeddings_response import EmbeddingsResponse
-from ..types.shared_params.message import Message
-from ..types.shared.batch_completion import BatchCompletion
-from ..types.shared_params.response_format import ResponseFormat
-from ..types.shared_params.sampling_params import SamplingParams
-from ..types.shared.chat_completion_response import ChatCompletionResponse
-from ..types.shared_params.interleaved_content import InterleavedContent
-from ..types.chat_completion_response_stream_chunk import ChatCompletionResponseStreamChunk
-from ..types.shared_params.interleaved_content_item import InterleavedContentItem
-from ..types.inference_batch_chat_completion_response import InferenceBatchChatCompletionResponse
-
-__all__ = ["InferenceResource", "AsyncInferenceResource"]
-
-
-class InferenceResource(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> InferenceResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return InferenceResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> InferenceResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return InferenceResourceWithStreamingResponse(self)
-
-    def batch_chat_completion(
-        self,
-        *,
-        messages_batch: Iterable[Iterable[Message]],
-        model_id: str,
-        logprobs: inference_batch_chat_completion_params.Logprobs | NotGiven = NOT_GIVEN,
-        response_format: ResponseFormat | NotGiven = NOT_GIVEN,
-        sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
-        tool_config: inference_batch_chat_completion_params.ToolConfig | NotGiven = NOT_GIVEN,
-        tools: Iterable[inference_batch_chat_completion_params.Tool] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> InferenceBatchChatCompletionResponse:
-        """
-        Generate chat completions for a batch of messages using the specified model.
-
-        Args:
-          messages_batch: The messages to generate completions for.
-
-          model_id: The identifier of the model to use. The model must be registered with Llama
-              Stack and available via the /models endpoint.
-
-          logprobs: (Optional) If specified, log probabilities for each token position will be
-              returned.
-
-          response_format: (Optional) Grammar specification for guided (structured) decoding.
-
-          sampling_params: (Optional) Parameters to control the sampling strategy.
-
-          tool_config: (Optional) Configuration for tool use.
-
-          tools: (Optional) List of tool definitions available to the model.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._post(
-            "/v1/inference/batch-chat-completion",
-            body=maybe_transform(
-                {
-                    "messages_batch": messages_batch,
-                    "model_id": model_id,
-                    "logprobs": logprobs,
-                    "response_format": response_format,
-                    "sampling_params": sampling_params,
-                    "tool_config": tool_config,
-                    "tools": tools,
-                },
-                inference_batch_chat_completion_params.InferenceBatchChatCompletionParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=InferenceBatchChatCompletionResponse,
-        )
-
-    def batch_completion(
-        self,
-        *,
-        content_batch: List[InterleavedContent],
-        model_id: str,
-        logprobs: inference_batch_completion_params.Logprobs | NotGiven = NOT_GIVEN,
-        response_format: ResponseFormat | NotGiven = NOT_GIVEN,
-        sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> BatchCompletion:
-        """
-        Generate completions for a batch of content using the specified model.
-
-        Args:
-          content_batch: The content to generate completions for.
-
-          model_id: The identifier of the model to use. The model must be registered with Llama
-              Stack and available via the /models endpoint.
-
-          logprobs: (Optional) If specified, log probabilities for each token position will be
-              returned.
-
-          response_format: (Optional) Grammar specification for guided (structured) decoding.
-
-          sampling_params: (Optional) Parameters to control the sampling strategy.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._post(
-            "/v1/inference/batch-completion",
-            body=maybe_transform(
-                {
-                    "content_batch": content_batch,
-                    "model_id": model_id,
-                    "logprobs": logprobs,
-                    "response_format": response_format,
-                    "sampling_params": sampling_params,
-                },
-                inference_batch_completion_params.InferenceBatchCompletionParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=BatchCompletion,
-        )
-
-    @overload
-    def chat_completion(
-        self,
-        *,
-        messages: Iterable[Message],
-        model_id: str,
-        logprobs: inference_chat_completion_params.Logprobs | NotGiven = NOT_GIVEN,
-        response_format: ResponseFormat | NotGiven = NOT_GIVEN,
-        sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
-        stream: Literal[False] | NotGiven = NOT_GIVEN,
-        tool_choice: Literal["auto", "required", "none"] | NotGiven = NOT_GIVEN,
-        tool_config: inference_chat_completion_params.ToolConfig | NotGiven = NOT_GIVEN,
-        tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN,
-        tools: Iterable[inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletionResponse:
-        """
-        Generate a chat completion for the given messages using the specified model.
-
-        Args:
-          messages: List of messages in the conversation.
-
-          model_id: The identifier of the model to use. The model must be registered with Llama
-              Stack and available via the /models endpoint.
-
-          logprobs: (Optional) If specified, log probabilities for each token position will be
-              returned.
-
-          response_format: (Optional) Grammar specification for guided (structured) decoding. There are two
-              options: - `ResponseFormat.json_schema`: The grammar is a JSON schema. Most
-              providers support this format. - `ResponseFormat.grammar`: The grammar is a BNF
-              grammar. This format is more flexible, but not all providers support it.
-
-          sampling_params: Parameters to control the sampling strategy.
-
-          stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
-              False.
-
-          tool_choice: (Optional) Whether tool use is required or automatic. Defaults to
-              ToolChoice.auto. .. deprecated:: Use tool_config instead.
-
-          tool_config: (Optional) Configuration for tool use.
-
-          tool_prompt_format: (Optional) Instructs the model how to format tool calls. By default, Llama Stack
-              will attempt to use a format that is best adapted to the model. -
-              `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. -
-              `ToolPromptFormat.function_tag`: The tool calls are enclosed in a
-              <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls
-              are output as Python syntax -- a list of function calls. .. deprecated:: Use
-              tool_config instead.
-
-          tools: (Optional) List of tool definitions available to the model.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def chat_completion(
-        self,
-        *,
-        messages: Iterable[Message],
-        model_id: str,
-        stream: Literal[True],
-        logprobs: inference_chat_completion_params.Logprobs | NotGiven = NOT_GIVEN,
-        response_format: ResponseFormat | NotGiven = NOT_GIVEN,
-        sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
-        tool_choice: Literal["auto", "required", "none"] | NotGiven = NOT_GIVEN,
-        tool_config: inference_chat_completion_params.ToolConfig | NotGiven = NOT_GIVEN,
-        tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN,
-        tools: Iterable[inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Stream[ChatCompletionResponseStreamChunk]:
-        """
-        Generate a chat completion for the given messages using the specified model.
-
-        Args:
-          messages: List of messages in the conversation.
-
-          model_id: The identifier of the model to use. The model must be registered with Llama
-              Stack and available via the /models endpoint.
-
-          stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
-              False.
-
-          logprobs: (Optional) If specified, log probabilities for each token position will be
-              returned.
-
-          response_format: (Optional) Grammar specification for guided (structured) decoding. There are two
-              options: - `ResponseFormat.json_schema`: The grammar is a JSON schema. Most
-              providers support this format. - `ResponseFormat.grammar`: The grammar is a BNF
-              grammar. This format is more flexible, but not all providers support it.
-
-          sampling_params: Parameters to control the sampling strategy.
-
-          tool_choice: (Optional) Whether tool use is required or automatic. Defaults to
-              ToolChoice.auto. .. deprecated:: Use tool_config instead.
-
-          tool_config: (Optional) Configuration for tool use.
-
-          tool_prompt_format: (Optional) Instructs the model how to format tool calls. By default, Llama Stack
-              will attempt to use a format that is best adapted to the model. -
-              `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. -
-              `ToolPromptFormat.function_tag`: The tool calls are enclosed in a
-              <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls
-              are output as Python syntax -- a list of function calls. .. deprecated:: Use
-              tool_config instead.
-
-          tools: (Optional) List of tool definitions available to the model.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def chat_completion(
-        self,
-        *,
-        messages: Iterable[Message],
-        model_id: str,
-        stream: bool,
-        logprobs: inference_chat_completion_params.Logprobs | NotGiven = NOT_GIVEN,
-        response_format: ResponseFormat | NotGiven = NOT_GIVEN,
-        sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
-        tool_choice: Literal["auto", "required", "none"] | NotGiven = NOT_GIVEN,
-        tool_config: inference_chat_completion_params.ToolConfig | NotGiven = NOT_GIVEN,
-        tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN,
-        tools: Iterable[inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletionResponse | Stream[ChatCompletionResponseStreamChunk]:
-        """
-        Generate a chat completion for the given messages using the specified model.
-
-        Args:
-          messages: List of messages in the conversation.
-
-          model_id: The identifier of the model to use. The model must be registered with Llama
-              Stack and available via the /models endpoint.
-
-          stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
-              False.
-
-          logprobs: (Optional) If specified, log probabilities for each token position will be
-              returned.
-
-          response_format: (Optional) Grammar specification for guided (structured) decoding. There are two
-              options: - `ResponseFormat.json_schema`: The grammar is a JSON schema. Most
-              providers support this format. - `ResponseFormat.grammar`: The grammar is a BNF
-              grammar. This format is more flexible, but not all providers support it.
-
-          sampling_params: Parameters to control the sampling strategy.
-
-          tool_choice: (Optional) Whether tool use is required or automatic. Defaults to
-              ToolChoice.auto. .. deprecated:: Use tool_config instead.
-
-          tool_config: (Optional) Configuration for tool use.
-
-          tool_prompt_format: (Optional) Instructs the model how to format tool calls. By default, Llama Stack
-              will attempt to use a format that is best adapted to the model. -
-              `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. -
-              `ToolPromptFormat.function_tag`: The tool calls are enclosed in a
-              <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls
-              are output as Python syntax -- a list of function calls. .. deprecated:: Use
-              tool_config instead.
-
-          tools: (Optional) List of tool definitions available to the model.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @required_args(["messages", "model_id"], ["messages", "model_id", "stream"])
-    def chat_completion(
-        self,
-        *,
-        messages: Iterable[Message],
-        model_id: str,
-        logprobs: inference_chat_completion_params.Logprobs | NotGiven = NOT_GIVEN,
-        response_format: ResponseFormat | NotGiven = NOT_GIVEN,
-        sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
-        stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
-        tool_choice: Literal["auto", "required", "none"] | NotGiven = NOT_GIVEN,
-        tool_config: inference_chat_completion_params.ToolConfig | NotGiven = NOT_GIVEN,
-        tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN,
-        tools: Iterable[inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletionResponse | Stream[ChatCompletionResponseStreamChunk]:
-        if stream:
-            extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})}
-        return self._post(
-            "/v1/inference/chat-completion",
-            body=maybe_transform(
-                {
-                    "messages": messages,
-                    "model_id": model_id,
-                    "logprobs": logprobs,
-                    "response_format": response_format,
-                    "sampling_params": sampling_params,
-                    "stream": stream,
-                    "tool_choice": tool_choice,
-                    "tool_config": tool_config,
-                    "tool_prompt_format": tool_prompt_format,
-                    "tools": tools,
-                },
-                inference_chat_completion_params.InferenceChatCompletionParamsStreaming
-                if stream
-                else inference_chat_completion_params.InferenceChatCompletionParamsNonStreaming,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=ChatCompletionResponse,
-            stream=stream or False,
-            stream_cls=Stream[ChatCompletionResponseStreamChunk],
-        )
-
-    @overload
-    def completion(
-        self,
-        *,
-        content: InterleavedContent,
-        model_id: str,
-        logprobs: inference_completion_params.Logprobs | NotGiven = NOT_GIVEN,
-        response_format: ResponseFormat | NotGiven = NOT_GIVEN,
-        sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
-        stream: Literal[False] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> CompletionResponse:
-        """
-        Generate a completion for the given content using the specified model.
-
-        Args:
-          content: The content to generate a completion for.
-
-          model_id: The identifier of the model to use. The model must be registered with Llama
-              Stack and available via the /models endpoint.
-
-          logprobs: (Optional) If specified, log probabilities for each token position will be
-              returned.
-
-          response_format: (Optional) Grammar specification for guided (structured) decoding.
-
-          sampling_params: (Optional) Parameters to control the sampling strategy.
-
-          stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
-              False.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def completion(
-        self,
-        *,
-        content: InterleavedContent,
-        model_id: str,
-        stream: Literal[True],
-        logprobs: inference_completion_params.Logprobs | NotGiven = NOT_GIVEN,
-        response_format: ResponseFormat | NotGiven = NOT_GIVEN,
-        sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Stream[CompletionResponse]:
-        """
-        Generate a completion for the given content using the specified model.
-
-        Args:
-          content: The content to generate a completion for.
-
-          model_id: The identifier of the model to use. The model must be registered with Llama
-              Stack and available via the /models endpoint.
-
-          stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
-              False.
-
-          logprobs: (Optional) If specified, log probabilities for each token position will be
-              returned.
-
-          response_format: (Optional) Grammar specification for guided (structured) decoding.
-
-          sampling_params: (Optional) Parameters to control the sampling strategy.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def completion(
-        self,
-        *,
-        content: InterleavedContent,
-        model_id: str,
-        stream: bool,
-        logprobs: inference_completion_params.Logprobs | NotGiven = NOT_GIVEN,
-        response_format: ResponseFormat | NotGiven = NOT_GIVEN,
-        sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> CompletionResponse | Stream[CompletionResponse]:
-        """
-        Generate a completion for the given content using the specified model.
-
-        Args:
-          content: The content to generate a completion for.
-
-          model_id: The identifier of the model to use. The model must be registered with Llama
-              Stack and available via the /models endpoint.
-
-          stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
-              False.
-
-          logprobs: (Optional) If specified, log probabilities for each token position will be
-              returned.
-
-          response_format: (Optional) Grammar specification for guided (structured) decoding.
-
-          sampling_params: (Optional) Parameters to control the sampling strategy.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @required_args(["content", "model_id"], ["content", "model_id", "stream"])
-    def completion(
-        self,
-        *,
-        content: InterleavedContent,
-        model_id: str,
-        logprobs: inference_completion_params.Logprobs | NotGiven = NOT_GIVEN,
-        response_format: ResponseFormat | NotGiven = NOT_GIVEN,
-        sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
-        stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> CompletionResponse | Stream[CompletionResponse]:
-        if stream:
-            extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})}
-        return self._post(
-            "/v1/inference/completion",
-            body=maybe_transform(
-                {
-                    "content": content,
-                    "model_id": model_id,
-                    "logprobs": logprobs,
-                    "response_format": response_format,
-                    "sampling_params": sampling_params,
-                    "stream": stream,
-                },
-                inference_completion_params.InferenceCompletionParamsStreaming
-                if stream
-                else inference_completion_params.InferenceCompletionParamsNonStreaming,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=CompletionResponse,
-            stream=stream or False,
-            stream_cls=Stream[CompletionResponse],
-        )
-
-    def embeddings(
-        self,
-        *,
-        contents: Union[List[str], Iterable[InterleavedContentItem]],
-        model_id: str,
-        output_dimension: int | NotGiven = NOT_GIVEN,
-        task_type: Literal["query", "document"] | NotGiven = NOT_GIVEN,
-        text_truncation: Literal["none", "start", "end"] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> EmbeddingsResponse:
-        """
-        Generate embeddings for content pieces using the specified model.
-
-        Args:
-          contents: List of contents to generate embeddings for. Each content can be a string or an
-              InterleavedContentItem (and hence can be multimodal). The behavior depends on
-              the model and provider. Some models may only support text.
-
-          model_id: The identifier of the model to use. The model must be an embedding model
-              registered with Llama Stack and available via the /models endpoint.
-
-          output_dimension: (Optional) Output dimensionality for the embeddings. Only supported by
-              Matryoshka models.
-
-          task_type: (Optional) How is the embedding being used? This is only supported by asymmetric
-              embedding models.
-
-          text_truncation: (Optional) Config for how to truncate text for embedding when text is longer
-              than the model's max sequence length.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._post(
-            "/v1/inference/embeddings",
-            body=maybe_transform(
-                {
-                    "contents": contents,
-                    "model_id": model_id,
-                    "output_dimension": output_dimension,
-                    "task_type": task_type,
-                    "text_truncation": text_truncation,
-                },
-                inference_embeddings_params.InferenceEmbeddingsParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=EmbeddingsResponse,
-        )
-
-
-class AsyncInferenceResource(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncInferenceResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncInferenceResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncInferenceResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncInferenceResourceWithStreamingResponse(self)
-
-    async def batch_chat_completion(
-        self,
-        *,
-        messages_batch: Iterable[Iterable[Message]],
-        model_id: str,
-        logprobs: inference_batch_chat_completion_params.Logprobs | NotGiven = NOT_GIVEN,
-        response_format: ResponseFormat | NotGiven = NOT_GIVEN,
-        sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
-        tool_config: inference_batch_chat_completion_params.ToolConfig | NotGiven = NOT_GIVEN,
-        tools: Iterable[inference_batch_chat_completion_params.Tool] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> InferenceBatchChatCompletionResponse:
-        """
-        Generate chat completions for a batch of messages using the specified model.
-
-        Args:
-          messages_batch: The messages to generate completions for.
-
-          model_id: The identifier of the model to use. The model must be registered with Llama
-              Stack and available via the /models endpoint.
-
-          logprobs: (Optional) If specified, log probabilities for each token position will be
-              returned.
-
-          response_format: (Optional) Grammar specification for guided (structured) decoding.
-
-          sampling_params: (Optional) Parameters to control the sampling strategy.
-
-          tool_config: (Optional) Configuration for tool use.
-
-          tools: (Optional) List of tool definitions available to the model.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return await self._post(
-            "/v1/inference/batch-chat-completion",
-            body=await async_maybe_transform(
-                {
-                    "messages_batch": messages_batch,
-                    "model_id": model_id,
-                    "logprobs": logprobs,
-                    "response_format": response_format,
-                    "sampling_params": sampling_params,
-                    "tool_config": tool_config,
-                    "tools": tools,
-                },
-                inference_batch_chat_completion_params.InferenceBatchChatCompletionParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=InferenceBatchChatCompletionResponse,
-        )
-
-    async def batch_completion(
-        self,
-        *,
-        content_batch: List[InterleavedContent],
-        model_id: str,
-        logprobs: inference_batch_completion_params.Logprobs | NotGiven = NOT_GIVEN,
-        response_format: ResponseFormat | NotGiven = NOT_GIVEN,
-        sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> BatchCompletion:
-        """
-        Generate completions for a batch of content using the specified model.
-
-        Args:
-          content_batch: The content to generate completions for.
-
-          model_id: The identifier of the model to use. The model must be registered with Llama
-              Stack and available via the /models endpoint.
-
-          logprobs: (Optional) If specified, log probabilities for each token position will be
-              returned.
-
-          response_format: (Optional) Grammar specification for guided (structured) decoding.
-
-          sampling_params: (Optional) Parameters to control the sampling strategy.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return await self._post(
-            "/v1/inference/batch-completion",
-            body=await async_maybe_transform(
-                {
-                    "content_batch": content_batch,
-                    "model_id": model_id,
-                    "logprobs": logprobs,
-                    "response_format": response_format,
-                    "sampling_params": sampling_params,
-                },
-                inference_batch_completion_params.InferenceBatchCompletionParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=BatchCompletion,
-        )
-
-    @overload
-    async def chat_completion(
-        self,
-        *,
-        messages: Iterable[Message],
-        model_id: str,
-        logprobs: inference_chat_completion_params.Logprobs | NotGiven = NOT_GIVEN,
-        response_format: ResponseFormat | NotGiven = NOT_GIVEN,
-        sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
-        stream: Literal[False] | NotGiven = NOT_GIVEN,
-        tool_choice: Literal["auto", "required", "none"] | NotGiven = NOT_GIVEN,
-        tool_config: inference_chat_completion_params.ToolConfig | NotGiven = NOT_GIVEN,
-        tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN,
-        tools: Iterable[inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletionResponse:
-        """
-        Generate a chat completion for the given messages using the specified model.
-
-        Args:
-          messages: List of messages in the conversation.
-
-          model_id: The identifier of the model to use. The model must be registered with Llama
-              Stack and available via the /models endpoint.
-
-          logprobs: (Optional) If specified, log probabilities for each token position will be
-              returned.
-
-          response_format: (Optional) Grammar specification for guided (structured) decoding. There are two
-              options: - `ResponseFormat.json_schema`: The grammar is a JSON schema. Most
-              providers support this format. - `ResponseFormat.grammar`: The grammar is a BNF
-              grammar. This format is more flexible, but not all providers support it.
-
-          sampling_params: Parameters to control the sampling strategy.
-
-          stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
-              False.
-
-          tool_choice: (Optional) Whether tool use is required or automatic. Defaults to
-              ToolChoice.auto. .. deprecated:: Use tool_config instead.
-
-          tool_config: (Optional) Configuration for tool use.
-
-          tool_prompt_format: (Optional) Instructs the model how to format tool calls. By default, Llama Stack
-              will attempt to use a format that is best adapted to the model. -
-              `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. -
-              `ToolPromptFormat.function_tag`: The tool calls are enclosed in a
-              <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls
-              are output as Python syntax -- a list of function calls. .. deprecated:: Use
-              tool_config instead.
-
-          tools: (Optional) List of tool definitions available to the model.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def chat_completion(
-        self,
-        *,
-        messages: Iterable[Message],
-        model_id: str,
-        stream: Literal[True],
-        logprobs: inference_chat_completion_params.Logprobs | NotGiven = NOT_GIVEN,
-        response_format: ResponseFormat | NotGiven = NOT_GIVEN,
-        sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
-        tool_choice: Literal["auto", "required", "none"] | NotGiven = NOT_GIVEN,
-        tool_config: inference_chat_completion_params.ToolConfig | NotGiven = NOT_GIVEN,
-        tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN,
-        tools: Iterable[inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncStream[ChatCompletionResponseStreamChunk]:
-        """
-        Generate a chat completion for the given messages using the specified model.
-
-        Args:
-          messages: List of messages in the conversation.
-
-          model_id: The identifier of the model to use. The model must be registered with Llama
-              Stack and available via the /models endpoint.
-
-          stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
-              False.
-
-          logprobs: (Optional) If specified, log probabilities for each token position will be
-              returned.
-
-          response_format: (Optional) Grammar specification for guided (structured) decoding. There are two
-              options: - `ResponseFormat.json_schema`: The grammar is a JSON schema. Most
-              providers support this format. - `ResponseFormat.grammar`: The grammar is a BNF
-              grammar. This format is more flexible, but not all providers support it.
-
-          sampling_params: Parameters to control the sampling strategy.
-
-          tool_choice: (Optional) Whether tool use is required or automatic. Defaults to
-              ToolChoice.auto. .. deprecated:: Use tool_config instead.
-
-          tool_config: (Optional) Configuration for tool use.
-
-          tool_prompt_format: (Optional) Instructs the model how to format tool calls. By default, Llama Stack
-              will attempt to use a format that is best adapted to the model. -
-              `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. -
-              `ToolPromptFormat.function_tag`: The tool calls are enclosed in a
-              <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls
-              are output as Python syntax -- a list of function calls. .. deprecated:: Use
-              tool_config instead.
-
-          tools: (Optional) List of tool definitions available to the model.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def chat_completion(
-        self,
-        *,
-        messages: Iterable[Message],
-        model_id: str,
-        stream: bool,
-        logprobs: inference_chat_completion_params.Logprobs | NotGiven = NOT_GIVEN,
-        response_format: ResponseFormat | NotGiven = NOT_GIVEN,
-        sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
-        tool_choice: Literal["auto", "required", "none"] | NotGiven = NOT_GIVEN,
-        tool_config: inference_chat_completion_params.ToolConfig | NotGiven = NOT_GIVEN,
-        tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN,
-        tools: Iterable[inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletionResponse | AsyncStream[ChatCompletionResponseStreamChunk]:
-        """
-        Generate a chat completion for the given messages using the specified model.
-
-        Args:
-          messages: List of messages in the conversation.
-
-          model_id: The identifier of the model to use. The model must be registered with Llama
-              Stack and available via the /models endpoint.
-
-          stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
-              False.
-
-          logprobs: (Optional) If specified, log probabilities for each token position will be
-              returned.
-
-          response_format: (Optional) Grammar specification for guided (structured) decoding. There are two
-              options: - `ResponseFormat.json_schema`: The grammar is a JSON schema. Most
-              providers support this format. - `ResponseFormat.grammar`: The grammar is a BNF
-              grammar. This format is more flexible, but not all providers support it.
-
-          sampling_params: Parameters to control the sampling strategy.
-
-          tool_choice: (Optional) Whether tool use is required or automatic. Defaults to
-              ToolChoice.auto. .. deprecated:: Use tool_config instead.
-
-          tool_config: (Optional) Configuration for tool use.
-
-          tool_prompt_format: (Optional) Instructs the model how to format tool calls. By default, Llama Stack
-              will attempt to use a format that is best adapted to the model. -
-              `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. -
-              `ToolPromptFormat.function_tag`: The tool calls are enclosed in a
-              <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls
-              are output as Python syntax -- a list of function calls. .. deprecated:: Use
-              tool_config instead.
-
-          tools: (Optional) List of tool definitions available to the model.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @required_args(["messages", "model_id"], ["messages", "model_id", "stream"])
-    async def chat_completion(
-        self,
-        *,
-        messages: Iterable[Message],
-        model_id: str,
-        logprobs: inference_chat_completion_params.Logprobs | NotGiven = NOT_GIVEN,
-        response_format: ResponseFormat | NotGiven = NOT_GIVEN,
-        sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
-        stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
-        tool_choice: Literal["auto", "required", "none"] | NotGiven = NOT_GIVEN,
-        tool_config: inference_chat_completion_params.ToolConfig | NotGiven = NOT_GIVEN,
-        tool_prompt_format: Literal["json", "function_tag", "python_list"] | NotGiven = NOT_GIVEN,
-        tools: Iterable[inference_chat_completion_params.Tool] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletionResponse | AsyncStream[ChatCompletionResponseStreamChunk]:
-        if stream:
-            extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})}
-        return await self._post(
-            "/v1/inference/chat-completion",
-            body=await async_maybe_transform(
-                {
-                    "messages": messages,
-                    "model_id": model_id,
-                    "logprobs": logprobs,
-                    "response_format": response_format,
-                    "sampling_params": sampling_params,
-                    "stream": stream,
-                    "tool_choice": tool_choice,
-                    "tool_config": tool_config,
-                    "tool_prompt_format": tool_prompt_format,
-                    "tools": tools,
-                },
-                inference_chat_completion_params.InferenceChatCompletionParamsStreaming
-                if stream
-                else inference_chat_completion_params.InferenceChatCompletionParamsNonStreaming,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=ChatCompletionResponse,
-            stream=stream or False,
-            stream_cls=AsyncStream[ChatCompletionResponseStreamChunk],
-        )
-
-    @overload
-    async def completion(
-        self,
-        *,
-        content: InterleavedContent,
-        model_id: str,
-        logprobs: inference_completion_params.Logprobs | NotGiven = NOT_GIVEN,
-        response_format: ResponseFormat | NotGiven = NOT_GIVEN,
-        sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
-        stream: Literal[False] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> CompletionResponse:
-        """
-        Generate a completion for the given content using the specified model.
-
-        Args:
-          content: The content to generate a completion for.
-
-          model_id: The identifier of the model to use. The model must be registered with Llama
-              Stack and available via the /models endpoint.
-
-          logprobs: (Optional) If specified, log probabilities for each token position will be
-              returned.
-
-          response_format: (Optional) Grammar specification for guided (structured) decoding.
-
-          sampling_params: (Optional) Parameters to control the sampling strategy.
-
-          stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
-              False.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def completion(
-        self,
-        *,
-        content: InterleavedContent,
-        model_id: str,
-        stream: Literal[True],
-        logprobs: inference_completion_params.Logprobs | NotGiven = NOT_GIVEN,
-        response_format: ResponseFormat | NotGiven = NOT_GIVEN,
-        sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncStream[CompletionResponse]:
-        """
-        Generate a completion for the given content using the specified model.
-
-        Args:
-          content: The content to generate a completion for.
-
-          model_id: The identifier of the model to use. The model must be registered with Llama
-              Stack and available via the /models endpoint.
-
-          stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
-              False.
-
-          logprobs: (Optional) If specified, log probabilities for each token position will be
-              returned.
-
-          response_format: (Optional) Grammar specification for guided (structured) decoding.
-
-          sampling_params: (Optional) Parameters to control the sampling strategy.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def completion(
-        self,
-        *,
-        content: InterleavedContent,
-        model_id: str,
-        stream: bool,
-        logprobs: inference_completion_params.Logprobs | NotGiven = NOT_GIVEN,
-        response_format: ResponseFormat | NotGiven = NOT_GIVEN,
-        sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> CompletionResponse | AsyncStream[CompletionResponse]:
-        """
-        Generate a completion for the given content using the specified model.
-
-        Args:
-          content: The content to generate a completion for.
-
-          model_id: The identifier of the model to use. The model must be registered with Llama
-              Stack and available via the /models endpoint.
-
-          stream: (Optional) If True, generate an SSE event stream of the response. Defaults to
-              False.
-
-          logprobs: (Optional) If specified, log probabilities for each token position will be
-              returned.
-
-          response_format: (Optional) Grammar specification for guided (structured) decoding.
-
-          sampling_params: (Optional) Parameters to control the sampling strategy.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @required_args(["content", "model_id"], ["content", "model_id", "stream"])
-    async def completion(
-        self,
-        *,
-        content: InterleavedContent,
-        model_id: str,
-        logprobs: inference_completion_params.Logprobs | NotGiven = NOT_GIVEN,
-        response_format: ResponseFormat | NotGiven = NOT_GIVEN,
-        sampling_params: SamplingParams | NotGiven = NOT_GIVEN,
-        stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> CompletionResponse | AsyncStream[CompletionResponse]:
-        if stream:
-            extra_headers = {"Accept": "text/event-stream", **(extra_headers or {})}
-        return await self._post(
-            "/v1/inference/completion",
-            body=await async_maybe_transform(
-                {
-                    "content": content,
-                    "model_id": model_id,
-                    "logprobs": logprobs,
-                    "response_format": response_format,
-                    "sampling_params": sampling_params,
-                    "stream": stream,
-                },
-                inference_completion_params.InferenceCompletionParamsStreaming
-                if stream
-                else inference_completion_params.InferenceCompletionParamsNonStreaming,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=CompletionResponse,
-            stream=stream or False,
-            stream_cls=AsyncStream[CompletionResponse],
-        )
-
-    async def embeddings(
-        self,
-        *,
-        contents: Union[List[str], Iterable[InterleavedContentItem]],
-        model_id: str,
-        output_dimension: int | NotGiven = NOT_GIVEN,
-        task_type: Literal["query", "document"] | NotGiven = NOT_GIVEN,
-        text_truncation: Literal["none", "start", "end"] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> EmbeddingsResponse:
-        """
-        Generate embeddings for content pieces using the specified model.
-
-        Args:
-          contents: List of contents to generate embeddings for. Each content can be a string or an
-              InterleavedContentItem (and hence can be multimodal). The behavior depends on
-              the model and provider. Some models may only support text.
-
-          model_id: The identifier of the model to use. The model must be an embedding model
-              registered with Llama Stack and available via the /models endpoint.
-
-          output_dimension: (Optional) Output dimensionality for the embeddings. Only supported by
-              Matryoshka models.
-
-          task_type: (Optional) How is the embedding being used? This is only supported by asymmetric
-              embedding models.
-
-          text_truncation: (Optional) Config for how to truncate text for embedding when text is longer
-              than the model's max sequence length.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return await self._post(
-            "/v1/inference/embeddings",
-            body=await async_maybe_transform(
-                {
-                    "contents": contents,
-                    "model_id": model_id,
-                    "output_dimension": output_dimension,
-                    "task_type": task_type,
-                    "text_truncation": text_truncation,
-                },
-                inference_embeddings_params.InferenceEmbeddingsParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=EmbeddingsResponse,
-        )
-
-
-class InferenceResourceWithRawResponse:
-    def __init__(self, inference: InferenceResource) -> None:
-        self._inference = inference
-
-        self.batch_chat_completion = to_raw_response_wrapper(
-            inference.batch_chat_completion,
-        )
-        self.batch_completion = to_raw_response_wrapper(
-            inference.batch_completion,
-        )
-        self.chat_completion = to_raw_response_wrapper(
-            inference.chat_completion,
-        )
-        self.completion = to_raw_response_wrapper(
-            inference.completion,
-        )
-        self.embeddings = to_raw_response_wrapper(
-            inference.embeddings,
-        )
-
-
-class AsyncInferenceResourceWithRawResponse:
-    def __init__(self, inference: AsyncInferenceResource) -> None:
-        self._inference = inference
-
-        self.batch_chat_completion = async_to_raw_response_wrapper(
-            inference.batch_chat_completion,
-        )
-        self.batch_completion = async_to_raw_response_wrapper(
-            inference.batch_completion,
-        )
-        self.chat_completion = async_to_raw_response_wrapper(
-            inference.chat_completion,
-        )
-        self.completion = async_to_raw_response_wrapper(
-            inference.completion,
-        )
-        self.embeddings = async_to_raw_response_wrapper(
-            inference.embeddings,
-        )
-
-
-class InferenceResourceWithStreamingResponse:
-    def __init__(self, inference: InferenceResource) -> None:
-        self._inference = inference
-
-        self.batch_chat_completion = to_streamed_response_wrapper(
-            inference.batch_chat_completion,
-        )
-        self.batch_completion = to_streamed_response_wrapper(
-            inference.batch_completion,
-        )
-        self.chat_completion = to_streamed_response_wrapper(
-            inference.chat_completion,
-        )
-        self.completion = to_streamed_response_wrapper(
-            inference.completion,
-        )
-        self.embeddings = to_streamed_response_wrapper(
-            inference.embeddings,
-        )
-
-
-class AsyncInferenceResourceWithStreamingResponse:
-    def __init__(self, inference: AsyncInferenceResource) -> None:
-        self._inference = inference
-
-        self.batch_chat_completion = async_to_streamed_response_wrapper(
-            inference.batch_chat_completion,
-        )
-        self.batch_completion = async_to_streamed_response_wrapper(
-            inference.batch_completion,
-        )
-        self.chat_completion = async_to_streamed_response_wrapper(
-            inference.chat_completion,
-        )
-        self.completion = async_to_streamed_response_wrapper(
-            inference.completion,
-        )
-        self.embeddings = async_to_streamed_response_wrapper(
-            inference.embeddings,
-        )
diff --git a/src/llama_stack_client/resources/inspect.py b/src/llama_stack_client/resources/inspect.py
deleted file mode 100644
index eb028c16..00000000
--- a/src/llama_stack_client/resources/inspect.py
+++ /dev/null
@@ -1,186 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import httpx
-
-from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._compat import cached_property
-from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from .._base_client import make_request_options
-from ..types.health_info import HealthInfo
-from ..types.version_info import VersionInfo
-
-__all__ = ["InspectResource", "AsyncInspectResource"]
-
-
-class InspectResource(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> InspectResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return InspectResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> InspectResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return InspectResourceWithStreamingResponse(self)
-
-    def health(
-        self,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> HealthInfo:
-        """Get the health of the service."""
-        return self._get(
-            "/v1/health",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=HealthInfo,
-        )
-
-    def version(
-        self,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> VersionInfo:
-        """Get the version of the service."""
-        return self._get(
-            "/v1/version",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=VersionInfo,
-        )
-
-
-class AsyncInspectResource(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncInspectResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncInspectResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncInspectResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncInspectResourceWithStreamingResponse(self)
-
-    async def health(
-        self,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> HealthInfo:
-        """Get the health of the service."""
-        return await self._get(
-            "/v1/health",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=HealthInfo,
-        )
-
-    async def version(
-        self,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> VersionInfo:
-        """Get the version of the service."""
-        return await self._get(
-            "/v1/version",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=VersionInfo,
-        )
-
-
-class InspectResourceWithRawResponse:
-    def __init__(self, inspect: InspectResource) -> None:
-        self._inspect = inspect
-
-        self.health = to_raw_response_wrapper(
-            inspect.health,
-        )
-        self.version = to_raw_response_wrapper(
-            inspect.version,
-        )
-
-
-class AsyncInspectResourceWithRawResponse:
-    def __init__(self, inspect: AsyncInspectResource) -> None:
-        self._inspect = inspect
-
-        self.health = async_to_raw_response_wrapper(
-            inspect.health,
-        )
-        self.version = async_to_raw_response_wrapper(
-            inspect.version,
-        )
-
-
-class InspectResourceWithStreamingResponse:
-    def __init__(self, inspect: InspectResource) -> None:
-        self._inspect = inspect
-
-        self.health = to_streamed_response_wrapper(
-            inspect.health,
-        )
-        self.version = to_streamed_response_wrapper(
-            inspect.version,
-        )
-
-
-class AsyncInspectResourceWithStreamingResponse:
-    def __init__(self, inspect: AsyncInspectResource) -> None:
-        self._inspect = inspect
-
-        self.health = async_to_streamed_response_wrapper(
-            inspect.health,
-        )
-        self.version = async_to_streamed_response_wrapper(
-            inspect.version,
-        )
diff --git a/src/llama_stack_client/resources/models.py b/src/llama_stack_client/resources/models.py
deleted file mode 100644
index 4efb632a..00000000
--- a/src/llama_stack_client/resources/models.py
+++ /dev/null
@@ -1,430 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Type, Union, Iterable, cast
-from typing_extensions import Literal
-
-import httpx
-
-from ..types import model_register_params
-from .._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
-from .._utils import maybe_transform, async_maybe_transform
-from .._compat import cached_property
-from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from .._wrappers import DataWrapper
-from ..types.model import Model
-from .._base_client import make_request_options
-from ..types.model_list_response import ModelListResponse
-
-__all__ = ["ModelsResource", "AsyncModelsResource"]
-
-
-class ModelsResource(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> ModelsResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return ModelsResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> ModelsResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return ModelsResourceWithStreamingResponse(self)
-
-    def retrieve(
-        self,
-        model_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Model:
-        """
-        Get a model by its identifier.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not model_id:
-            raise ValueError(f"Expected a non-empty value for `model_id` but received {model_id!r}")
-        return self._get(
-            f"/v1/models/{model_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Model,
-        )
-
-    def list(
-        self,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ModelListResponse:
-        """List all models."""
-        return self._get(
-            "/v1/models",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                post_parser=DataWrapper[ModelListResponse]._unwrapper,
-            ),
-            cast_to=cast(Type[ModelListResponse], DataWrapper[ModelListResponse]),
-        )
-
-    def register(
-        self,
-        *,
-        model_id: str,
-        metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        model_type: Literal["llm", "embedding"] | NotGiven = NOT_GIVEN,
-        provider_id: str | NotGiven = NOT_GIVEN,
-        provider_model_id: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Model:
-        """
-        Register a model.
-
-        Args:
-          model_id: The identifier of the model to register.
-
-          metadata: Any additional metadata for this model.
-
-          model_type: The type of model to register.
-
-          provider_id: The identifier of the provider.
-
-          provider_model_id: The identifier of the model in the provider.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._post(
-            "/v1/models",
-            body=maybe_transform(
-                {
-                    "model_id": model_id,
-                    "metadata": metadata,
-                    "model_type": model_type,
-                    "provider_id": provider_id,
-                    "provider_model_id": provider_model_id,
-                },
-                model_register_params.ModelRegisterParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Model,
-        )
-
-    def unregister(
-        self,
-        model_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> None:
-        """
-        Unregister a model.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not model_id:
-            raise ValueError(f"Expected a non-empty value for `model_id` but received {model_id!r}")
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        return self._delete(
-            f"/v1/models/{model_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=NoneType,
-        )
-
-
-class AsyncModelsResource(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncModelsResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncModelsResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncModelsResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncModelsResourceWithStreamingResponse(self)
-
-    async def retrieve(
-        self,
-        model_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Model:
-        """
-        Get a model by its identifier.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not model_id:
-            raise ValueError(f"Expected a non-empty value for `model_id` but received {model_id!r}")
-        return await self._get(
-            f"/v1/models/{model_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Model,
-        )
-
-    async def list(
-        self,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ModelListResponse:
-        """List all models."""
-        return await self._get(
-            "/v1/models",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                post_parser=DataWrapper[ModelListResponse]._unwrapper,
-            ),
-            cast_to=cast(Type[ModelListResponse], DataWrapper[ModelListResponse]),
-        )
-
-    async def register(
-        self,
-        *,
-        model_id: str,
-        metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        model_type: Literal["llm", "embedding"] | NotGiven = NOT_GIVEN,
-        provider_id: str | NotGiven = NOT_GIVEN,
-        provider_model_id: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Model:
-        """
-        Register a model.
-
-        Args:
-          model_id: The identifier of the model to register.
-
-          metadata: Any additional metadata for this model.
-
-          model_type: The type of model to register.
-
-          provider_id: The identifier of the provider.
-
-          provider_model_id: The identifier of the model in the provider.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return await self._post(
-            "/v1/models",
-            body=await async_maybe_transform(
-                {
-                    "model_id": model_id,
-                    "metadata": metadata,
-                    "model_type": model_type,
-                    "provider_id": provider_id,
-                    "provider_model_id": provider_model_id,
-                },
-                model_register_params.ModelRegisterParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Model,
-        )
-
-    async def unregister(
-        self,
-        model_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> None:
-        """
-        Unregister a model.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not model_id:
-            raise ValueError(f"Expected a non-empty value for `model_id` but received {model_id!r}")
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        return await self._delete(
-            f"/v1/models/{model_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=NoneType,
-        )
-
-
-class ModelsResourceWithRawResponse:
-    def __init__(self, models: ModelsResource) -> None:
-        self._models = models
-
-        self.retrieve = to_raw_response_wrapper(
-            models.retrieve,
-        )
-        self.list = to_raw_response_wrapper(
-            models.list,
-        )
-        self.register = to_raw_response_wrapper(
-            models.register,
-        )
-        self.unregister = to_raw_response_wrapper(
-            models.unregister,
-        )
-
-
-class AsyncModelsResourceWithRawResponse:
-    def __init__(self, models: AsyncModelsResource) -> None:
-        self._models = models
-
-        self.retrieve = async_to_raw_response_wrapper(
-            models.retrieve,
-        )
-        self.list = async_to_raw_response_wrapper(
-            models.list,
-        )
-        self.register = async_to_raw_response_wrapper(
-            models.register,
-        )
-        self.unregister = async_to_raw_response_wrapper(
-            models.unregister,
-        )
-
-
-class ModelsResourceWithStreamingResponse:
-    def __init__(self, models: ModelsResource) -> None:
-        self._models = models
-
-        self.retrieve = to_streamed_response_wrapper(
-            models.retrieve,
-        )
-        self.list = to_streamed_response_wrapper(
-            models.list,
-        )
-        self.register = to_streamed_response_wrapper(
-            models.register,
-        )
-        self.unregister = to_streamed_response_wrapper(
-            models.unregister,
-        )
-
-
-class AsyncModelsResourceWithStreamingResponse:
-    def __init__(self, models: AsyncModelsResource) -> None:
-        self._models = models
-
-        self.retrieve = async_to_streamed_response_wrapper(
-            models.retrieve,
-        )
-        self.list = async_to_streamed_response_wrapper(
-            models.list,
-        )
-        self.register = async_to_streamed_response_wrapper(
-            models.register,
-        )
-        self.unregister = async_to_streamed_response_wrapper(
-            models.unregister,
-        )
diff --git a/src/llama_stack_client/resources/post_training/__init__.py b/src/llama_stack_client/resources/post_training/__init__.py
deleted file mode 100644
index e1fa2361..00000000
--- a/src/llama_stack_client/resources/post_training/__init__.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .job import (
-    JobResource,
-    AsyncJobResource,
-    JobResourceWithRawResponse,
-    AsyncJobResourceWithRawResponse,
-    JobResourceWithStreamingResponse,
-    AsyncJobResourceWithStreamingResponse,
-)
-from .post_training import (
-    PostTrainingResource,
-    AsyncPostTrainingResource,
-    PostTrainingResourceWithRawResponse,
-    AsyncPostTrainingResourceWithRawResponse,
-    PostTrainingResourceWithStreamingResponse,
-    AsyncPostTrainingResourceWithStreamingResponse,
-)
-
-__all__ = [
-    "JobResource",
-    "AsyncJobResource",
-    "JobResourceWithRawResponse",
-    "AsyncJobResourceWithRawResponse",
-    "JobResourceWithStreamingResponse",
-    "AsyncJobResourceWithStreamingResponse",
-    "PostTrainingResource",
-    "AsyncPostTrainingResource",
-    "PostTrainingResourceWithRawResponse",
-    "AsyncPostTrainingResourceWithRawResponse",
-    "PostTrainingResourceWithStreamingResponse",
-    "AsyncPostTrainingResourceWithStreamingResponse",
-]
diff --git a/src/llama_stack_client/resources/post_training/job.py b/src/llama_stack_client/resources/post_training/job.py
deleted file mode 100644
index 2252b19e..00000000
--- a/src/llama_stack_client/resources/post_training/job.py
+++ /dev/null
@@ -1,404 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import List, Type, cast
-
-import httpx
-
-from ..._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
-from ..._utils import maybe_transform, async_maybe_transform
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from ..._wrappers import DataWrapper
-from ..._base_client import make_request_options
-from ...types.post_training import job_cancel_params, job_status_params, job_artifacts_params
-from ...types.list_post_training_jobs_response import Data
-from ...types.post_training.job_status_response import JobStatusResponse
-from ...types.post_training.job_artifacts_response import JobArtifactsResponse
-
-__all__ = ["JobResource", "AsyncJobResource"]
-
-
-class JobResource(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> JobResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return JobResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> JobResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return JobResourceWithStreamingResponse(self)
-
-    def list(
-        self,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> List[Data]:
-        """Get all training jobs."""
-        return self._get(
-            "/v1/post-training/jobs",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                post_parser=DataWrapper[List[Data]]._unwrapper,
-            ),
-            cast_to=cast(Type[List[Data]], DataWrapper[Data]),
-        )
-
-    def artifacts(
-        self,
-        *,
-        job_uuid: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> JobArtifactsResponse:
-        """
-        Get the artifacts of a training job.
-
-        Args:
-          job_uuid: The UUID of the job to get the artifacts of.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._get(
-            "/v1/post-training/job/artifacts",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform({"job_uuid": job_uuid}, job_artifacts_params.JobArtifactsParams),
-            ),
-            cast_to=JobArtifactsResponse,
-        )
-
-    def cancel(
-        self,
-        *,
-        job_uuid: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> None:
-        """
-        Cancel a training job.
-
-        Args:
-          job_uuid: The UUID of the job to cancel.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        return self._post(
-            "/v1/post-training/job/cancel",
-            body=maybe_transform({"job_uuid": job_uuid}, job_cancel_params.JobCancelParams),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=NoneType,
-        )
-
-    def status(
-        self,
-        *,
-        job_uuid: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> JobStatusResponse:
-        """
-        Get the status of a training job.
-
-        Args:
-          job_uuid: The UUID of the job to get the status of.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._get(
-            "/v1/post-training/job/status",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform({"job_uuid": job_uuid}, job_status_params.JobStatusParams),
-            ),
-            cast_to=JobStatusResponse,
-        )
-
-
-class AsyncJobResource(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncJobResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncJobResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncJobResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncJobResourceWithStreamingResponse(self)
-
-    async def list(
-        self,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> List[Data]:
-        """Get all training jobs."""
-        return await self._get(
-            "/v1/post-training/jobs",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                post_parser=DataWrapper[List[Data]]._unwrapper,
-            ),
-            cast_to=cast(Type[List[Data]], DataWrapper[Data]),
-        )
-
-    async def artifacts(
-        self,
-        *,
-        job_uuid: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> JobArtifactsResponse:
-        """
-        Get the artifacts of a training job.
-
-        Args:
-          job_uuid: The UUID of the job to get the artifacts of.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return await self._get(
-            "/v1/post-training/job/artifacts",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=await async_maybe_transform({"job_uuid": job_uuid}, job_artifacts_params.JobArtifactsParams),
-            ),
-            cast_to=JobArtifactsResponse,
-        )
-
-    async def cancel(
-        self,
-        *,
-        job_uuid: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> None:
-        """
-        Cancel a training job.
-
-        Args:
-          job_uuid: The UUID of the job to cancel.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        return await self._post(
-            "/v1/post-training/job/cancel",
-            body=await async_maybe_transform({"job_uuid": job_uuid}, job_cancel_params.JobCancelParams),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=NoneType,
-        )
-
-    async def status(
-        self,
-        *,
-        job_uuid: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> JobStatusResponse:
-        """
-        Get the status of a training job.
-
-        Args:
-          job_uuid: The UUID of the job to get the status of.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return await self._get(
-            "/v1/post-training/job/status",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=await async_maybe_transform({"job_uuid": job_uuid}, job_status_params.JobStatusParams),
-            ),
-            cast_to=JobStatusResponse,
-        )
-
-
-class JobResourceWithRawResponse:
-    def __init__(self, job: JobResource) -> None:
-        self._job = job
-
-        self.list = to_raw_response_wrapper(
-            job.list,
-        )
-        self.artifacts = to_raw_response_wrapper(
-            job.artifacts,
-        )
-        self.cancel = to_raw_response_wrapper(
-            job.cancel,
-        )
-        self.status = to_raw_response_wrapper(
-            job.status,
-        )
-
-
-class AsyncJobResourceWithRawResponse:
-    def __init__(self, job: AsyncJobResource) -> None:
-        self._job = job
-
-        self.list = async_to_raw_response_wrapper(
-            job.list,
-        )
-        self.artifacts = async_to_raw_response_wrapper(
-            job.artifacts,
-        )
-        self.cancel = async_to_raw_response_wrapper(
-            job.cancel,
-        )
-        self.status = async_to_raw_response_wrapper(
-            job.status,
-        )
-
-
-class JobResourceWithStreamingResponse:
-    def __init__(self, job: JobResource) -> None:
-        self._job = job
-
-        self.list = to_streamed_response_wrapper(
-            job.list,
-        )
-        self.artifacts = to_streamed_response_wrapper(
-            job.artifacts,
-        )
-        self.cancel = to_streamed_response_wrapper(
-            job.cancel,
-        )
-        self.status = to_streamed_response_wrapper(
-            job.status,
-        )
-
-
-class AsyncJobResourceWithStreamingResponse:
-    def __init__(self, job: AsyncJobResource) -> None:
-        self._job = job
-
-        self.list = async_to_streamed_response_wrapper(
-            job.list,
-        )
-        self.artifacts = async_to_streamed_response_wrapper(
-            job.artifacts,
-        )
-        self.cancel = async_to_streamed_response_wrapper(
-            job.cancel,
-        )
-        self.status = async_to_streamed_response_wrapper(
-            job.status,
-        )
diff --git a/src/llama_stack_client/resources/post_training/post_training.py b/src/llama_stack_client/resources/post_training/post_training.py
deleted file mode 100644
index ff1fab45..00000000
--- a/src/llama_stack_client/resources/post_training/post_training.py
+++ /dev/null
@@ -1,393 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union, Iterable
-
-import httpx
-
-from .job import (
-    JobResource,
-    AsyncJobResource,
-    JobResourceWithRawResponse,
-    AsyncJobResourceWithRawResponse,
-    JobResourceWithStreamingResponse,
-    AsyncJobResourceWithStreamingResponse,
-)
-from ...types import (
-    post_training_preference_optimize_params,
-    post_training_supervised_fine_tune_params,
-)
-from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ..._utils import maybe_transform, async_maybe_transform
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from ..._base_client import make_request_options
-from ...types.post_training_job import PostTrainingJob
-from ...types.algorithm_config_param import AlgorithmConfigParam
-
-__all__ = ["PostTrainingResource", "AsyncPostTrainingResource"]
-
-
-class PostTrainingResource(SyncAPIResource):
-    @cached_property
-    def job(self) -> JobResource:
-        return JobResource(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> PostTrainingResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return PostTrainingResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> PostTrainingResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return PostTrainingResourceWithStreamingResponse(self)
-
-    def preference_optimize(
-        self,
-        *,
-        algorithm_config: post_training_preference_optimize_params.AlgorithmConfig,
-        finetuned_model: str,
-        hyperparam_search_config: Dict[str, Union[bool, float, str, Iterable[object], object, None]],
-        job_uuid: str,
-        logger_config: Dict[str, Union[bool, float, str, Iterable[object], object, None]],
-        training_config: post_training_preference_optimize_params.TrainingConfig,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> PostTrainingJob:
-        """
-        Run preference optimization of a model.
-
-        Args:
-          algorithm_config: The algorithm configuration.
-
-          finetuned_model: The model to fine-tune.
-
-          hyperparam_search_config: The hyperparam search configuration.
-
-          job_uuid: The UUID of the job to create.
-
-          logger_config: The logger configuration.
-
-          training_config: The training configuration.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._post(
-            "/v1/post-training/preference-optimize",
-            body=maybe_transform(
-                {
-                    "algorithm_config": algorithm_config,
-                    "finetuned_model": finetuned_model,
-                    "hyperparam_search_config": hyperparam_search_config,
-                    "job_uuid": job_uuid,
-                    "logger_config": logger_config,
-                    "training_config": training_config,
-                },
-                post_training_preference_optimize_params.PostTrainingPreferenceOptimizeParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=PostTrainingJob,
-        )
-
-    def supervised_fine_tune(
-        self,
-        *,
-        hyperparam_search_config: Dict[str, Union[bool, float, str, Iterable[object], object, None]],
-        job_uuid: str,
-        logger_config: Dict[str, Union[bool, float, str, Iterable[object], object, None]],
-        training_config: post_training_supervised_fine_tune_params.TrainingConfig,
-        algorithm_config: AlgorithmConfigParam | NotGiven = NOT_GIVEN,
-        checkpoint_dir: str | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> PostTrainingJob:
-        """
-        Run supervised fine-tuning of a model.
-
-        Args:
-          hyperparam_search_config: The hyperparam search configuration.
-
-          job_uuid: The UUID of the job to create.
-
-          logger_config: The logger configuration.
-
-          training_config: The training configuration.
-
-          algorithm_config: The algorithm configuration.
-
-          checkpoint_dir: The directory to save checkpoint(s) to.
-
-          model: The model to fine-tune.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._post(
-            "/v1/post-training/supervised-fine-tune",
-            body=maybe_transform(
-                {
-                    "hyperparam_search_config": hyperparam_search_config,
-                    "job_uuid": job_uuid,
-                    "logger_config": logger_config,
-                    "training_config": training_config,
-                    "algorithm_config": algorithm_config,
-                    "checkpoint_dir": checkpoint_dir,
-                    "model": model,
-                },
-                post_training_supervised_fine_tune_params.PostTrainingSupervisedFineTuneParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=PostTrainingJob,
-        )
-
-
-class AsyncPostTrainingResource(AsyncAPIResource):
-    @cached_property
-    def job(self) -> AsyncJobResource:
-        return AsyncJobResource(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> AsyncPostTrainingResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncPostTrainingResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncPostTrainingResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncPostTrainingResourceWithStreamingResponse(self)
-
-    async def preference_optimize(
-        self,
-        *,
-        algorithm_config: post_training_preference_optimize_params.AlgorithmConfig,
-        finetuned_model: str,
-        hyperparam_search_config: Dict[str, Union[bool, float, str, Iterable[object], object, None]],
-        job_uuid: str,
-        logger_config: Dict[str, Union[bool, float, str, Iterable[object], object, None]],
-        training_config: post_training_preference_optimize_params.TrainingConfig,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> PostTrainingJob:
-        """
-        Run preference optimization of a model.
-
-        Args:
-          algorithm_config: The algorithm configuration.
-
-          finetuned_model: The model to fine-tune.
-
-          hyperparam_search_config: The hyperparam search configuration.
-
-          job_uuid: The UUID of the job to create.
-
-          logger_config: The logger configuration.
-
-          training_config: The training configuration.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return await self._post(
-            "/v1/post-training/preference-optimize",
-            body=await async_maybe_transform(
-                {
-                    "algorithm_config": algorithm_config,
-                    "finetuned_model": finetuned_model,
-                    "hyperparam_search_config": hyperparam_search_config,
-                    "job_uuid": job_uuid,
-                    "logger_config": logger_config,
-                    "training_config": training_config,
-                },
-                post_training_preference_optimize_params.PostTrainingPreferenceOptimizeParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=PostTrainingJob,
-        )
-
-    async def supervised_fine_tune(
-        self,
-        *,
-        hyperparam_search_config: Dict[str, Union[bool, float, str, Iterable[object], object, None]],
-        job_uuid: str,
-        logger_config: Dict[str, Union[bool, float, str, Iterable[object], object, None]],
-        training_config: post_training_supervised_fine_tune_params.TrainingConfig,
-        algorithm_config: AlgorithmConfigParam | NotGiven = NOT_GIVEN,
-        checkpoint_dir: str | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> PostTrainingJob:
-        """
-        Run supervised fine-tuning of a model.
-
-        Args:
-          hyperparam_search_config: The hyperparam search configuration.
-
-          job_uuid: The UUID of the job to create.
-
-          logger_config: The logger configuration.
-
-          training_config: The training configuration.
-
-          algorithm_config: The algorithm configuration.
-
-          checkpoint_dir: The directory to save checkpoint(s) to.
-
-          model: The model to fine-tune.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return await self._post(
-            "/v1/post-training/supervised-fine-tune",
-            body=await async_maybe_transform(
-                {
-                    "hyperparam_search_config": hyperparam_search_config,
-                    "job_uuid": job_uuid,
-                    "logger_config": logger_config,
-                    "training_config": training_config,
-                    "algorithm_config": algorithm_config,
-                    "checkpoint_dir": checkpoint_dir,
-                    "model": model,
-                },
-                post_training_supervised_fine_tune_params.PostTrainingSupervisedFineTuneParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=PostTrainingJob,
-        )
-
-
-class PostTrainingResourceWithRawResponse:
-    def __init__(self, post_training: PostTrainingResource) -> None:
-        self._post_training = post_training
-
-        self.preference_optimize = to_raw_response_wrapper(
-            post_training.preference_optimize,
-        )
-        self.supervised_fine_tune = to_raw_response_wrapper(
-            post_training.supervised_fine_tune,
-        )
-
-    @cached_property
-    def job(self) -> JobResourceWithRawResponse:
-        return JobResourceWithRawResponse(self._post_training.job)
-
-
-class AsyncPostTrainingResourceWithRawResponse:
-    def __init__(self, post_training: AsyncPostTrainingResource) -> None:
-        self._post_training = post_training
-
-        self.preference_optimize = async_to_raw_response_wrapper(
-            post_training.preference_optimize,
-        )
-        self.supervised_fine_tune = async_to_raw_response_wrapper(
-            post_training.supervised_fine_tune,
-        )
-
-    @cached_property
-    def job(self) -> AsyncJobResourceWithRawResponse:
-        return AsyncJobResourceWithRawResponse(self._post_training.job)
-
-
-class PostTrainingResourceWithStreamingResponse:
-    def __init__(self, post_training: PostTrainingResource) -> None:
-        self._post_training = post_training
-
-        self.preference_optimize = to_streamed_response_wrapper(
-            post_training.preference_optimize,
-        )
-        self.supervised_fine_tune = to_streamed_response_wrapper(
-            post_training.supervised_fine_tune,
-        )
-
-    @cached_property
-    def job(self) -> JobResourceWithStreamingResponse:
-        return JobResourceWithStreamingResponse(self._post_training.job)
-
-
-class AsyncPostTrainingResourceWithStreamingResponse:
-    def __init__(self, post_training: AsyncPostTrainingResource) -> None:
-        self._post_training = post_training
-
-        self.preference_optimize = async_to_streamed_response_wrapper(
-            post_training.preference_optimize,
-        )
-        self.supervised_fine_tune = async_to_streamed_response_wrapper(
-            post_training.supervised_fine_tune,
-        )
-
-    @cached_property
-    def job(self) -> AsyncJobResourceWithStreamingResponse:
-        return AsyncJobResourceWithStreamingResponse(self._post_training.job)
diff --git a/src/llama_stack_client/resources/providers.py b/src/llama_stack_client/resources/providers.py
deleted file mode 100644
index a50f7d67..00000000
--- a/src/llama_stack_client/resources/providers.py
+++ /dev/null
@@ -1,225 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Type, cast
-
-import httpx
-
-from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._compat import cached_property
-from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from .._wrappers import DataWrapper
-from .._base_client import make_request_options
-from ..types.provider_info import ProviderInfo
-from ..types.provider_list_response import ProviderListResponse
-
-__all__ = ["ProvidersResource", "AsyncProvidersResource"]
-
-
-class ProvidersResource(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> ProvidersResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return ProvidersResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> ProvidersResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return ProvidersResourceWithStreamingResponse(self)
-
-    def retrieve(
-        self,
-        provider_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ProviderInfo:
-        """
-        Get detailed information about a specific provider.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not provider_id:
-            raise ValueError(f"Expected a non-empty value for `provider_id` but received {provider_id!r}")
-        return self._get(
-            f"/v1/providers/{provider_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=ProviderInfo,
-        )
-
-    def list(
-        self,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ProviderListResponse:
-        """List all available providers."""
-        return self._get(
-            "/v1/providers",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                post_parser=DataWrapper[ProviderListResponse]._unwrapper,
-            ),
-            cast_to=cast(Type[ProviderListResponse], DataWrapper[ProviderListResponse]),
-        )
-
-
-class AsyncProvidersResource(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncProvidersResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncProvidersResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncProvidersResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncProvidersResourceWithStreamingResponse(self)
-
-    async def retrieve(
-        self,
-        provider_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ProviderInfo:
-        """
-        Get detailed information about a specific provider.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not provider_id:
-            raise ValueError(f"Expected a non-empty value for `provider_id` but received {provider_id!r}")
-        return await self._get(
-            f"/v1/providers/{provider_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=ProviderInfo,
-        )
-
-    async def list(
-        self,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ProviderListResponse:
-        """List all available providers."""
-        return await self._get(
-            "/v1/providers",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                post_parser=DataWrapper[ProviderListResponse]._unwrapper,
-            ),
-            cast_to=cast(Type[ProviderListResponse], DataWrapper[ProviderListResponse]),
-        )
-
-
-class ProvidersResourceWithRawResponse:
-    def __init__(self, providers: ProvidersResource) -> None:
-        self._providers = providers
-
-        self.retrieve = to_raw_response_wrapper(
-            providers.retrieve,
-        )
-        self.list = to_raw_response_wrapper(
-            providers.list,
-        )
-
-
-class AsyncProvidersResourceWithRawResponse:
-    def __init__(self, providers: AsyncProvidersResource) -> None:
-        self._providers = providers
-
-        self.retrieve = async_to_raw_response_wrapper(
-            providers.retrieve,
-        )
-        self.list = async_to_raw_response_wrapper(
-            providers.list,
-        )
-
-
-class ProvidersResourceWithStreamingResponse:
-    def __init__(self, providers: ProvidersResource) -> None:
-        self._providers = providers
-
-        self.retrieve = to_streamed_response_wrapper(
-            providers.retrieve,
-        )
-        self.list = to_streamed_response_wrapper(
-            providers.list,
-        )
-
-
-class AsyncProvidersResourceWithStreamingResponse:
-    def __init__(self, providers: AsyncProvidersResource) -> None:
-        self._providers = providers
-
-        self.retrieve = async_to_streamed_response_wrapper(
-            providers.retrieve,
-        )
-        self.list = async_to_streamed_response_wrapper(
-            providers.list,
-        )
diff --git a/src/llama_stack_client/resources/responses/__init__.py b/src/llama_stack_client/resources/responses/__init__.py
deleted file mode 100644
index 230ef765..00000000
--- a/src/llama_stack_client/resources/responses/__init__.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .responses import (
-    ResponsesResource,
-    AsyncResponsesResource,
-    ResponsesResourceWithRawResponse,
-    AsyncResponsesResourceWithRawResponse,
-    ResponsesResourceWithStreamingResponse,
-    AsyncResponsesResourceWithStreamingResponse,
-)
-from .input_items import (
-    InputItemsResource,
-    AsyncInputItemsResource,
-    InputItemsResourceWithRawResponse,
-    AsyncInputItemsResourceWithRawResponse,
-    InputItemsResourceWithStreamingResponse,
-    AsyncInputItemsResourceWithStreamingResponse,
-)
-
-__all__ = [
-    "InputItemsResource",
-    "AsyncInputItemsResource",
-    "InputItemsResourceWithRawResponse",
-    "AsyncInputItemsResourceWithRawResponse",
-    "InputItemsResourceWithStreamingResponse",
-    "AsyncInputItemsResourceWithStreamingResponse",
-    "ResponsesResource",
-    "AsyncResponsesResource",
-    "ResponsesResourceWithRawResponse",
-    "AsyncResponsesResourceWithRawResponse",
-    "ResponsesResourceWithStreamingResponse",
-    "AsyncResponsesResourceWithStreamingResponse",
-]
diff --git a/src/llama_stack_client/resources/responses/input_items.py b/src/llama_stack_client/resources/responses/input_items.py
deleted file mode 100644
index 08139af7..00000000
--- a/src/llama_stack_client/resources/responses/input_items.py
+++ /dev/null
@@ -1,226 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import List
-from typing_extensions import Literal
-
-import httpx
-
-from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ..._utils import maybe_transform, async_maybe_transform
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from ..._base_client import make_request_options
-from ...types.responses import input_item_list_params
-from ...types.responses.input_item_list_response import InputItemListResponse
-
-__all__ = ["InputItemsResource", "AsyncInputItemsResource"]
-
-
-class InputItemsResource(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> InputItemsResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return InputItemsResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> InputItemsResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return InputItemsResourceWithStreamingResponse(self)
-
-    def list(
-        self,
-        response_id: str,
-        *,
-        after: str | NotGiven = NOT_GIVEN,
-        before: str | NotGiven = NOT_GIVEN,
-        include: List[str] | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> InputItemListResponse:
-        """
-        List input items for a given OpenAI response.
-
-        Args:
-          after: An item ID to list items after, used for pagination.
-
-          before: An item ID to list items before, used for pagination.
-
-          include: Additional fields to include in the response.
-
-          limit: A limit on the number of objects to be returned. Limit can range between 1 and
-              100, and the default is 20.
-
-          order: The order to return the input items in. Default is desc.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not response_id:
-            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
-        return self._get(
-            f"/v1/openai/v1/responses/{response_id}/input_items",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform(
-                    {
-                        "after": after,
-                        "before": before,
-                        "include": include,
-                        "limit": limit,
-                        "order": order,
-                    },
-                    input_item_list_params.InputItemListParams,
-                ),
-            ),
-            cast_to=InputItemListResponse,
-        )
-
-
-class AsyncInputItemsResource(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncInputItemsResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncInputItemsResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncInputItemsResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncInputItemsResourceWithStreamingResponse(self)
-
-    async def list(
-        self,
-        response_id: str,
-        *,
-        after: str | NotGiven = NOT_GIVEN,
-        before: str | NotGiven = NOT_GIVEN,
-        include: List[str] | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> InputItemListResponse:
-        """
-        List input items for a given OpenAI response.
-
-        Args:
-          after: An item ID to list items after, used for pagination.
-
-          before: An item ID to list items before, used for pagination.
-
-          include: Additional fields to include in the response.
-
-          limit: A limit on the number of objects to be returned. Limit can range between 1 and
-              100, and the default is 20.
-
-          order: The order to return the input items in. Default is desc.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not response_id:
-            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
-        return await self._get(
-            f"/v1/openai/v1/responses/{response_id}/input_items",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=await async_maybe_transform(
-                    {
-                        "after": after,
-                        "before": before,
-                        "include": include,
-                        "limit": limit,
-                        "order": order,
-                    },
-                    input_item_list_params.InputItemListParams,
-                ),
-            ),
-            cast_to=InputItemListResponse,
-        )
-
-
-class InputItemsResourceWithRawResponse:
-    def __init__(self, input_items: InputItemsResource) -> None:
-        self._input_items = input_items
-
-        self.list = to_raw_response_wrapper(
-            input_items.list,
-        )
-
-
-class AsyncInputItemsResourceWithRawResponse:
-    def __init__(self, input_items: AsyncInputItemsResource) -> None:
-        self._input_items = input_items
-
-        self.list = async_to_raw_response_wrapper(
-            input_items.list,
-        )
-
-
-class InputItemsResourceWithStreamingResponse:
-    def __init__(self, input_items: InputItemsResource) -> None:
-        self._input_items = input_items
-
-        self.list = to_streamed_response_wrapper(
-            input_items.list,
-        )
-
-
-class AsyncInputItemsResourceWithStreamingResponse:
-    def __init__(self, input_items: AsyncInputItemsResource) -> None:
-        self._input_items = input_items
-
-        self.list = async_to_streamed_response_wrapper(
-            input_items.list,
-        )
diff --git a/src/llama_stack_client/resources/responses/responses.py b/src/llama_stack_client/resources/responses/responses.py
deleted file mode 100644
index fa05f7ed..00000000
--- a/src/llama_stack_client/resources/responses/responses.py
+++ /dev/null
@@ -1,689 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Union, Iterable
-from typing_extensions import Literal, overload
-
-import httpx
-
-from ...types import response_list_params, response_create_params
-from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ..._utils import required_args, maybe_transform, async_maybe_transform
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from .input_items import (
-    InputItemsResource,
-    AsyncInputItemsResource,
-    InputItemsResourceWithRawResponse,
-    AsyncInputItemsResourceWithRawResponse,
-    InputItemsResourceWithStreamingResponse,
-    AsyncInputItemsResourceWithStreamingResponse,
-)
-from ..._streaming import Stream, AsyncStream
-from ..._base_client import make_request_options
-from ...types.response_object import ResponseObject
-from ...types.response_list_response import ResponseListResponse
-from ...types.response_object_stream import ResponseObjectStream
-
-__all__ = ["ResponsesResource", "AsyncResponsesResource"]
-
-
-class ResponsesResource(SyncAPIResource):
-    @cached_property
-    def input_items(self) -> InputItemsResource:
-        return InputItemsResource(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> ResponsesResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return ResponsesResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> ResponsesResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return ResponsesResourceWithStreamingResponse(self)
-
-    @overload
-    def create(
-        self,
-        *,
-        input: Union[str, Iterable[response_create_params.InputUnionMember1]],
-        model: str,
-        instructions: str | NotGiven = NOT_GIVEN,
-        max_infer_iters: int | NotGiven = NOT_GIVEN,
-        previous_response_id: str | NotGiven = NOT_GIVEN,
-        store: bool | NotGiven = NOT_GIVEN,
-        stream: Literal[False] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        text: response_create_params.Text | NotGiven = NOT_GIVEN,
-        tools: Iterable[response_create_params.Tool] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ResponseObject:
-        """
-        Create a new OpenAI response.
-
-        Args:
-          input: Input message(s) to create the response.
-
-          model: The underlying LLM used for completions.
-
-          previous_response_id: (Optional) if specified, the new response will be a continuation of the previous
-              response. This can be used to easily fork-off new responses from existing
-              responses.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def create(
-        self,
-        *,
-        input: Union[str, Iterable[response_create_params.InputUnionMember1]],
-        model: str,
-        stream: Literal[True],
-        instructions: str | NotGiven = NOT_GIVEN,
-        max_infer_iters: int | NotGiven = NOT_GIVEN,
-        previous_response_id: str | NotGiven = NOT_GIVEN,
-        store: bool | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        text: response_create_params.Text | NotGiven = NOT_GIVEN,
-        tools: Iterable[response_create_params.Tool] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Stream[ResponseObjectStream]:
-        """
-        Create a new OpenAI response.
-
-        Args:
-          input: Input message(s) to create the response.
-
-          model: The underlying LLM used for completions.
-
-          previous_response_id: (Optional) if specified, the new response will be a continuation of the previous
-              response. This can be used to easily fork-off new responses from existing
-              responses.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def create(
-        self,
-        *,
-        input: Union[str, Iterable[response_create_params.InputUnionMember1]],
-        model: str,
-        stream: bool,
-        instructions: str | NotGiven = NOT_GIVEN,
-        max_infer_iters: int | NotGiven = NOT_GIVEN,
-        previous_response_id: str | NotGiven = NOT_GIVEN,
-        store: bool | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        text: response_create_params.Text | NotGiven = NOT_GIVEN,
-        tools: Iterable[response_create_params.Tool] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ResponseObject | Stream[ResponseObjectStream]:
-        """
-        Create a new OpenAI response.
-
-        Args:
-          input: Input message(s) to create the response.
-
-          model: The underlying LLM used for completions.
-
-          previous_response_id: (Optional) if specified, the new response will be a continuation of the previous
-              response. This can be used to easily fork-off new responses from existing
-              responses.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @required_args(["input", "model"], ["input", "model", "stream"])
-    def create(
-        self,
-        *,
-        input: Union[str, Iterable[response_create_params.InputUnionMember1]],
-        model: str,
-        instructions: str | NotGiven = NOT_GIVEN,
-        max_infer_iters: int | NotGiven = NOT_GIVEN,
-        previous_response_id: str | NotGiven = NOT_GIVEN,
-        store: bool | NotGiven = NOT_GIVEN,
-        stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        text: response_create_params.Text | NotGiven = NOT_GIVEN,
-        tools: Iterable[response_create_params.Tool] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ResponseObject | Stream[ResponseObjectStream]:
-        return self._post(
-            "/v1/openai/v1/responses",
-            body=maybe_transform(
-                {
-                    "input": input,
-                    "model": model,
-                    "instructions": instructions,
-                    "max_infer_iters": max_infer_iters,
-                    "previous_response_id": previous_response_id,
-                    "store": store,
-                    "stream": stream,
-                    "temperature": temperature,
-                    "text": text,
-                    "tools": tools,
-                },
-                response_create_params.ResponseCreateParamsStreaming
-                if stream
-                else response_create_params.ResponseCreateParamsNonStreaming,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=ResponseObject,
-            stream=stream or False,
-            stream_cls=Stream[ResponseObjectStream],
-        )
-
-    def retrieve(
-        self,
-        response_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ResponseObject:
-        """
-        Retrieve an OpenAI response by its ID.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not response_id:
-            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
-        return self._get(
-            f"/v1/openai/v1/responses/{response_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=ResponseObject,
-        )
-
-    def list(
-        self,
-        *,
-        after: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
-        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ResponseListResponse:
-        """
-        List all OpenAI responses.
-
-        Args:
-          after: The ID of the last response to return.
-
-          limit: The number of responses to return.
-
-          model: The model to filter responses by.
-
-          order: The order to sort responses by when sorted by created_at ('asc' or 'desc').
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._get(
-            "/v1/openai/v1/responses",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform(
-                    {
-                        "after": after,
-                        "limit": limit,
-                        "model": model,
-                        "order": order,
-                    },
-                    response_list_params.ResponseListParams,
-                ),
-            ),
-            cast_to=ResponseListResponse,
-        )
-
-
-class AsyncResponsesResource(AsyncAPIResource):
-    @cached_property
-    def input_items(self) -> AsyncInputItemsResource:
-        return AsyncInputItemsResource(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> AsyncResponsesResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncResponsesResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncResponsesResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncResponsesResourceWithStreamingResponse(self)
-
-    @overload
-    async def create(
-        self,
-        *,
-        input: Union[str, Iterable[response_create_params.InputUnionMember1]],
-        model: str,
-        instructions: str | NotGiven = NOT_GIVEN,
-        max_infer_iters: int | NotGiven = NOT_GIVEN,
-        previous_response_id: str | NotGiven = NOT_GIVEN,
-        store: bool | NotGiven = NOT_GIVEN,
-        stream: Literal[False] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        text: response_create_params.Text | NotGiven = NOT_GIVEN,
-        tools: Iterable[response_create_params.Tool] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ResponseObject:
-        """
-        Create a new OpenAI response.
-
-        Args:
-          input: Input message(s) to create the response.
-
-          model: The underlying LLM used for completions.
-
-          previous_response_id: (Optional) if specified, the new response will be a continuation of the previous
-              response. This can be used to easily fork-off new responses from existing
-              responses.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def create(
-        self,
-        *,
-        input: Union[str, Iterable[response_create_params.InputUnionMember1]],
-        model: str,
-        stream: Literal[True],
-        instructions: str | NotGiven = NOT_GIVEN,
-        max_infer_iters: int | NotGiven = NOT_GIVEN,
-        previous_response_id: str | NotGiven = NOT_GIVEN,
-        store: bool | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        text: response_create_params.Text | NotGiven = NOT_GIVEN,
-        tools: Iterable[response_create_params.Tool] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncStream[ResponseObjectStream]:
-        """
-        Create a new OpenAI response.
-
-        Args:
-          input: Input message(s) to create the response.
-
-          model: The underlying LLM used for completions.
-
-          previous_response_id: (Optional) if specified, the new response will be a continuation of the previous
-              response. This can be used to easily fork-off new responses from existing
-              responses.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def create(
-        self,
-        *,
-        input: Union[str, Iterable[response_create_params.InputUnionMember1]],
-        model: str,
-        stream: bool,
-        instructions: str | NotGiven = NOT_GIVEN,
-        max_infer_iters: int | NotGiven = NOT_GIVEN,
-        previous_response_id: str | NotGiven = NOT_GIVEN,
-        store: bool | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        text: response_create_params.Text | NotGiven = NOT_GIVEN,
-        tools: Iterable[response_create_params.Tool] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ResponseObject | AsyncStream[ResponseObjectStream]:
-        """
-        Create a new OpenAI response.
-
-        Args:
-          input: Input message(s) to create the response.
-
-          model: The underlying LLM used for completions.
-
-          previous_response_id: (Optional) if specified, the new response will be a continuation of the previous
-              response. This can be used to easily fork-off new responses from existing
-              responses.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @required_args(["input", "model"], ["input", "model", "stream"])
-    async def create(
-        self,
-        *,
-        input: Union[str, Iterable[response_create_params.InputUnionMember1]],
-        model: str,
-        instructions: str | NotGiven = NOT_GIVEN,
-        max_infer_iters: int | NotGiven = NOT_GIVEN,
-        previous_response_id: str | NotGiven = NOT_GIVEN,
-        store: bool | NotGiven = NOT_GIVEN,
-        stream: Literal[False] | Literal[True] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        text: response_create_params.Text | NotGiven = NOT_GIVEN,
-        tools: Iterable[response_create_params.Tool] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ResponseObject | AsyncStream[ResponseObjectStream]:
-        return await self._post(
-            "/v1/openai/v1/responses",
-            body=await async_maybe_transform(
-                {
-                    "input": input,
-                    "model": model,
-                    "instructions": instructions,
-                    "max_infer_iters": max_infer_iters,
-                    "previous_response_id": previous_response_id,
-                    "store": store,
-                    "stream": stream,
-                    "temperature": temperature,
-                    "text": text,
-                    "tools": tools,
-                },
-                response_create_params.ResponseCreateParamsStreaming
-                if stream
-                else response_create_params.ResponseCreateParamsNonStreaming,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=ResponseObject,
-            stream=stream or False,
-            stream_cls=AsyncStream[ResponseObjectStream],
-        )
-
-    async def retrieve(
-        self,
-        response_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ResponseObject:
-        """
-        Retrieve an OpenAI response by its ID.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not response_id:
-            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
-        return await self._get(
-            f"/v1/openai/v1/responses/{response_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=ResponseObject,
-        )
-
-    async def list(
-        self,
-        *,
-        after: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
-        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ResponseListResponse:
-        """
-        List all OpenAI responses.
-
-        Args:
-          after: The ID of the last response to return.
-
-          limit: The number of responses to return.
-
-          model: The model to filter responses by.
-
-          order: The order to sort responses by when sorted by created_at ('asc' or 'desc').
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return await self._get(
-            "/v1/openai/v1/responses",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=await async_maybe_transform(
-                    {
-                        "after": after,
-                        "limit": limit,
-                        "model": model,
-                        "order": order,
-                    },
-                    response_list_params.ResponseListParams,
-                ),
-            ),
-            cast_to=ResponseListResponse,
-        )
-
-
-class ResponsesResourceWithRawResponse:
-    def __init__(self, responses: ResponsesResource) -> None:
-        self._responses = responses
-
-        self.create = to_raw_response_wrapper(
-            responses.create,
-        )
-        self.retrieve = to_raw_response_wrapper(
-            responses.retrieve,
-        )
-        self.list = to_raw_response_wrapper(
-            responses.list,
-        )
-
-    @cached_property
-    def input_items(self) -> InputItemsResourceWithRawResponse:
-        return InputItemsResourceWithRawResponse(self._responses.input_items)
-
-
-class AsyncResponsesResourceWithRawResponse:
-    def __init__(self, responses: AsyncResponsesResource) -> None:
-        self._responses = responses
-
-        self.create = async_to_raw_response_wrapper(
-            responses.create,
-        )
-        self.retrieve = async_to_raw_response_wrapper(
-            responses.retrieve,
-        )
-        self.list = async_to_raw_response_wrapper(
-            responses.list,
-        )
-
-    @cached_property
-    def input_items(self) -> AsyncInputItemsResourceWithRawResponse:
-        return AsyncInputItemsResourceWithRawResponse(self._responses.input_items)
-
-
-class ResponsesResourceWithStreamingResponse:
-    def __init__(self, responses: ResponsesResource) -> None:
-        self._responses = responses
-
-        self.create = to_streamed_response_wrapper(
-            responses.create,
-        )
-        self.retrieve = to_streamed_response_wrapper(
-            responses.retrieve,
-        )
-        self.list = to_streamed_response_wrapper(
-            responses.list,
-        )
-
-    @cached_property
-    def input_items(self) -> InputItemsResourceWithStreamingResponse:
-        return InputItemsResourceWithStreamingResponse(self._responses.input_items)
-
-
-class AsyncResponsesResourceWithStreamingResponse:
-    def __init__(self, responses: AsyncResponsesResource) -> None:
-        self._responses = responses
-
-        self.create = async_to_streamed_response_wrapper(
-            responses.create,
-        )
-        self.retrieve = async_to_streamed_response_wrapper(
-            responses.retrieve,
-        )
-        self.list = async_to_streamed_response_wrapper(
-            responses.list,
-        )
-
-    @cached_property
-    def input_items(self) -> AsyncInputItemsResourceWithStreamingResponse:
-        return AsyncInputItemsResourceWithStreamingResponse(self._responses.input_items)
diff --git a/src/llama_stack_client/resources/routes.py b/src/llama_stack_client/resources/routes.py
deleted file mode 100644
index a95b5e06..00000000
--- a/src/llama_stack_client/resources/routes.py
+++ /dev/null
@@ -1,146 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Type, cast
-
-import httpx
-
-from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._compat import cached_property
-from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from .._wrappers import DataWrapper
-from .._base_client import make_request_options
-from ..types.route_list_response import RouteListResponse
-
-__all__ = ["RoutesResource", "AsyncRoutesResource"]
-
-
-class RoutesResource(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> RoutesResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return RoutesResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> RoutesResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return RoutesResourceWithStreamingResponse(self)
-
-    def list(
-        self,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> RouteListResponse:
-        """List all routes."""
-        return self._get(
-            "/v1/inspect/routes",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                post_parser=DataWrapper[RouteListResponse]._unwrapper,
-            ),
-            cast_to=cast(Type[RouteListResponse], DataWrapper[RouteListResponse]),
-        )
-
-
-class AsyncRoutesResource(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncRoutesResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncRoutesResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncRoutesResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncRoutesResourceWithStreamingResponse(self)
-
-    async def list(
-        self,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> RouteListResponse:
-        """List all routes."""
-        return await self._get(
-            "/v1/inspect/routes",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                post_parser=DataWrapper[RouteListResponse]._unwrapper,
-            ),
-            cast_to=cast(Type[RouteListResponse], DataWrapper[RouteListResponse]),
-        )
-
-
-class RoutesResourceWithRawResponse:
-    def __init__(self, routes: RoutesResource) -> None:
-        self._routes = routes
-
-        self.list = to_raw_response_wrapper(
-            routes.list,
-        )
-
-
-class AsyncRoutesResourceWithRawResponse:
-    def __init__(self, routes: AsyncRoutesResource) -> None:
-        self._routes = routes
-
-        self.list = async_to_raw_response_wrapper(
-            routes.list,
-        )
-
-
-class RoutesResourceWithStreamingResponse:
-    def __init__(self, routes: RoutesResource) -> None:
-        self._routes = routes
-
-        self.list = to_streamed_response_wrapper(
-            routes.list,
-        )
-
-
-class AsyncRoutesResourceWithStreamingResponse:
-    def __init__(self, routes: AsyncRoutesResource) -> None:
-        self._routes = routes
-
-        self.list = async_to_streamed_response_wrapper(
-            routes.list,
-        )
diff --git a/src/llama_stack_client/resources/safety.py b/src/llama_stack_client/resources/safety.py
deleted file mode 100644
index 813a1f67..00000000
--- a/src/llama_stack_client/resources/safety.py
+++ /dev/null
@@ -1,196 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union, Iterable
-
-import httpx
-
-from ..types import safety_run_shield_params
-from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._utils import maybe_transform, async_maybe_transform
-from .._compat import cached_property
-from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from .._base_client import make_request_options
-from ..types.run_shield_response import RunShieldResponse
-from ..types.shared_params.message import Message
-
-__all__ = ["SafetyResource", "AsyncSafetyResource"]
-
-
-class SafetyResource(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> SafetyResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return SafetyResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> SafetyResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return SafetyResourceWithStreamingResponse(self)
-
-    def run_shield(
-        self,
-        *,
-        messages: Iterable[Message],
-        params: Dict[str, Union[bool, float, str, Iterable[object], object, None]],
-        shield_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> RunShieldResponse:
-        """
-        Run a shield.
-
-        Args:
-          messages: The messages to run the shield on.
-
-          params: The parameters of the shield.
-
-          shield_id: The identifier of the shield to run.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._post(
-            "/v1/safety/run-shield",
-            body=maybe_transform(
-                {
-                    "messages": messages,
-                    "params": params,
-                    "shield_id": shield_id,
-                },
-                safety_run_shield_params.SafetyRunShieldParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=RunShieldResponse,
-        )
-
-
-class AsyncSafetyResource(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncSafetyResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncSafetyResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncSafetyResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncSafetyResourceWithStreamingResponse(self)
-
-    async def run_shield(
-        self,
-        *,
-        messages: Iterable[Message],
-        params: Dict[str, Union[bool, float, str, Iterable[object], object, None]],
-        shield_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> RunShieldResponse:
-        """
-        Run a shield.
-
-        Args:
-          messages: The messages to run the shield on.
-
-          params: The parameters of the shield.
-
-          shield_id: The identifier of the shield to run.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return await self._post(
-            "/v1/safety/run-shield",
-            body=await async_maybe_transform(
-                {
-                    "messages": messages,
-                    "params": params,
-                    "shield_id": shield_id,
-                },
-                safety_run_shield_params.SafetyRunShieldParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=RunShieldResponse,
-        )
-
-
-class SafetyResourceWithRawResponse:
-    def __init__(self, safety: SafetyResource) -> None:
-        self._safety = safety
-
-        self.run_shield = to_raw_response_wrapper(
-            safety.run_shield,
-        )
-
-
-class AsyncSafetyResourceWithRawResponse:
-    def __init__(self, safety: AsyncSafetyResource) -> None:
-        self._safety = safety
-
-        self.run_shield = async_to_raw_response_wrapper(
-            safety.run_shield,
-        )
-
-
-class SafetyResourceWithStreamingResponse:
-    def __init__(self, safety: SafetyResource) -> None:
-        self._safety = safety
-
-        self.run_shield = to_streamed_response_wrapper(
-            safety.run_shield,
-        )
-
-
-class AsyncSafetyResourceWithStreamingResponse:
-    def __init__(self, safety: AsyncSafetyResource) -> None:
-        self._safety = safety
-
-        self.run_shield = async_to_streamed_response_wrapper(
-            safety.run_shield,
-        )
diff --git a/src/llama_stack_client/resources/scoring.py b/src/llama_stack_client/resources/scoring.py
deleted file mode 100644
index 3e64f8eb..00000000
--- a/src/llama_stack_client/resources/scoring.py
+++ /dev/null
@@ -1,295 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union, Iterable, Optional
-
-import httpx
-
-from ..types import scoring_score_params, scoring_score_batch_params
-from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._utils import maybe_transform, async_maybe_transform
-from .._compat import cached_property
-from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from .._base_client import make_request_options
-from ..types.scoring_score_response import ScoringScoreResponse
-from ..types.scoring_fn_params_param import ScoringFnParamsParam
-from ..types.scoring_score_batch_response import ScoringScoreBatchResponse
-
-__all__ = ["ScoringResource", "AsyncScoringResource"]
-
-
-class ScoringResource(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> ScoringResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return ScoringResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> ScoringResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return ScoringResourceWithStreamingResponse(self)
-
-    def score(
-        self,
-        *,
-        input_rows: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]],
-        scoring_functions: Dict[str, Optional[ScoringFnParamsParam]],
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ScoringScoreResponse:
-        """
-        Score a list of rows.
-
-        Args:
-          input_rows: The rows to score.
-
-          scoring_functions: The scoring functions to use for the scoring.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._post(
-            "/v1/scoring/score",
-            body=maybe_transform(
-                {
-                    "input_rows": input_rows,
-                    "scoring_functions": scoring_functions,
-                },
-                scoring_score_params.ScoringScoreParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=ScoringScoreResponse,
-        )
-
-    def score_batch(
-        self,
-        *,
-        dataset_id: str,
-        save_results_dataset: bool,
-        scoring_functions: Dict[str, Optional[ScoringFnParamsParam]],
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ScoringScoreBatchResponse:
-        """
-        Score a batch of rows.
-
-        Args:
-          dataset_id: The ID of the dataset to score.
-
-          save_results_dataset: Whether to save the results to a dataset.
-
-          scoring_functions: The scoring functions to use for the scoring.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._post(
-            "/v1/scoring/score-batch",
-            body=maybe_transform(
-                {
-                    "dataset_id": dataset_id,
-                    "save_results_dataset": save_results_dataset,
-                    "scoring_functions": scoring_functions,
-                },
-                scoring_score_batch_params.ScoringScoreBatchParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=ScoringScoreBatchResponse,
-        )
-
-
-class AsyncScoringResource(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncScoringResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncScoringResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncScoringResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncScoringResourceWithStreamingResponse(self)
-
-    async def score(
-        self,
-        *,
-        input_rows: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]],
-        scoring_functions: Dict[str, Optional[ScoringFnParamsParam]],
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ScoringScoreResponse:
-        """
-        Score a list of rows.
-
-        Args:
-          input_rows: The rows to score.
-
-          scoring_functions: The scoring functions to use for the scoring.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return await self._post(
-            "/v1/scoring/score",
-            body=await async_maybe_transform(
-                {
-                    "input_rows": input_rows,
-                    "scoring_functions": scoring_functions,
-                },
-                scoring_score_params.ScoringScoreParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=ScoringScoreResponse,
-        )
-
-    async def score_batch(
-        self,
-        *,
-        dataset_id: str,
-        save_results_dataset: bool,
-        scoring_functions: Dict[str, Optional[ScoringFnParamsParam]],
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ScoringScoreBatchResponse:
-        """
-        Score a batch of rows.
-
-        Args:
-          dataset_id: The ID of the dataset to score.
-
-          save_results_dataset: Whether to save the results to a dataset.
-
-          scoring_functions: The scoring functions to use for the scoring.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return await self._post(
-            "/v1/scoring/score-batch",
-            body=await async_maybe_transform(
-                {
-                    "dataset_id": dataset_id,
-                    "save_results_dataset": save_results_dataset,
-                    "scoring_functions": scoring_functions,
-                },
-                scoring_score_batch_params.ScoringScoreBatchParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=ScoringScoreBatchResponse,
-        )
-
-
-class ScoringResourceWithRawResponse:
-    def __init__(self, scoring: ScoringResource) -> None:
-        self._scoring = scoring
-
-        self.score = to_raw_response_wrapper(
-            scoring.score,
-        )
-        self.score_batch = to_raw_response_wrapper(
-            scoring.score_batch,
-        )
-
-
-class AsyncScoringResourceWithRawResponse:
-    def __init__(self, scoring: AsyncScoringResource) -> None:
-        self._scoring = scoring
-
-        self.score = async_to_raw_response_wrapper(
-            scoring.score,
-        )
-        self.score_batch = async_to_raw_response_wrapper(
-            scoring.score_batch,
-        )
-
-
-class ScoringResourceWithStreamingResponse:
-    def __init__(self, scoring: ScoringResource) -> None:
-        self._scoring = scoring
-
-        self.score = to_streamed_response_wrapper(
-            scoring.score,
-        )
-        self.score_batch = to_streamed_response_wrapper(
-            scoring.score_batch,
-        )
-
-
-class AsyncScoringResourceWithStreamingResponse:
-    def __init__(self, scoring: AsyncScoringResource) -> None:
-        self._scoring = scoring
-
-        self.score = async_to_streamed_response_wrapper(
-            scoring.score,
-        )
-        self.score_batch = async_to_streamed_response_wrapper(
-            scoring.score_batch,
-        )
diff --git a/src/llama_stack_client/resources/scoring_functions.py b/src/llama_stack_client/resources/scoring_functions.py
deleted file mode 100644
index e1a77477..00000000
--- a/src/llama_stack_client/resources/scoring_functions.py
+++ /dev/null
@@ -1,359 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Type, cast
-
-import httpx
-
-from ..types import scoring_function_register_params
-from .._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
-from .._utils import maybe_transform, async_maybe_transform
-from .._compat import cached_property
-from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from .._wrappers import DataWrapper
-from .._base_client import make_request_options
-from ..types.scoring_fn import ScoringFn
-from ..types.scoring_fn_params_param import ScoringFnParamsParam
-from ..types.shared_params.return_type import ReturnType
-from ..types.scoring_function_list_response import ScoringFunctionListResponse
-
-__all__ = ["ScoringFunctionsResource", "AsyncScoringFunctionsResource"]
-
-
-class ScoringFunctionsResource(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> ScoringFunctionsResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return ScoringFunctionsResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> ScoringFunctionsResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return ScoringFunctionsResourceWithStreamingResponse(self)
-
-    def retrieve(
-        self,
-        scoring_fn_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ScoringFn:
-        """
-        Get a scoring function by its ID.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not scoring_fn_id:
-            raise ValueError(f"Expected a non-empty value for `scoring_fn_id` but received {scoring_fn_id!r}")
-        return self._get(
-            f"/v1/scoring-functions/{scoring_fn_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=ScoringFn,
-        )
-
-    def list(
-        self,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ScoringFunctionListResponse:
-        """List all scoring functions."""
-        return self._get(
-            "/v1/scoring-functions",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                post_parser=DataWrapper[ScoringFunctionListResponse]._unwrapper,
-            ),
-            cast_to=cast(Type[ScoringFunctionListResponse], DataWrapper[ScoringFunctionListResponse]),
-        )
-
-    def register(
-        self,
-        *,
-        description: str,
-        return_type: ReturnType,
-        scoring_fn_id: str,
-        params: ScoringFnParamsParam | NotGiven = NOT_GIVEN,
-        provider_id: str | NotGiven = NOT_GIVEN,
-        provider_scoring_fn_id: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> None:
-        """
-        Register a scoring function.
-
-        Args:
-          description: The description of the scoring function.
-
-          scoring_fn_id: The ID of the scoring function to register.
-
-          params: The parameters for the scoring function for benchmark eval, these can be
-              overridden for app eval.
-
-          provider_id: The ID of the provider to use for the scoring function.
-
-          provider_scoring_fn_id: The ID of the provider scoring function to use for the scoring function.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        return self._post(
-            "/v1/scoring-functions",
-            body=maybe_transform(
-                {
-                    "description": description,
-                    "return_type": return_type,
-                    "scoring_fn_id": scoring_fn_id,
-                    "params": params,
-                    "provider_id": provider_id,
-                    "provider_scoring_fn_id": provider_scoring_fn_id,
-                },
-                scoring_function_register_params.ScoringFunctionRegisterParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=NoneType,
-        )
-
-
-class AsyncScoringFunctionsResource(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncScoringFunctionsResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncScoringFunctionsResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncScoringFunctionsResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncScoringFunctionsResourceWithStreamingResponse(self)
-
-    async def retrieve(
-        self,
-        scoring_fn_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ScoringFn:
-        """
-        Get a scoring function by its ID.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not scoring_fn_id:
-            raise ValueError(f"Expected a non-empty value for `scoring_fn_id` but received {scoring_fn_id!r}")
-        return await self._get(
-            f"/v1/scoring-functions/{scoring_fn_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=ScoringFn,
-        )
-
-    async def list(
-        self,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ScoringFunctionListResponse:
-        """List all scoring functions."""
-        return await self._get(
-            "/v1/scoring-functions",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                post_parser=DataWrapper[ScoringFunctionListResponse]._unwrapper,
-            ),
-            cast_to=cast(Type[ScoringFunctionListResponse], DataWrapper[ScoringFunctionListResponse]),
-        )
-
-    async def register(
-        self,
-        *,
-        description: str,
-        return_type: ReturnType,
-        scoring_fn_id: str,
-        params: ScoringFnParamsParam | NotGiven = NOT_GIVEN,
-        provider_id: str | NotGiven = NOT_GIVEN,
-        provider_scoring_fn_id: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> None:
-        """
-        Register a scoring function.
-
-        Args:
-          description: The description of the scoring function.
-
-          scoring_fn_id: The ID of the scoring function to register.
-
-          params: The parameters for the scoring function for benchmark eval, these can be
-              overridden for app eval.
-
-          provider_id: The ID of the provider to use for the scoring function.
-
-          provider_scoring_fn_id: The ID of the provider scoring function to use for the scoring function.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        return await self._post(
-            "/v1/scoring-functions",
-            body=await async_maybe_transform(
-                {
-                    "description": description,
-                    "return_type": return_type,
-                    "scoring_fn_id": scoring_fn_id,
-                    "params": params,
-                    "provider_id": provider_id,
-                    "provider_scoring_fn_id": provider_scoring_fn_id,
-                },
-                scoring_function_register_params.ScoringFunctionRegisterParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=NoneType,
-        )
-
-
-class ScoringFunctionsResourceWithRawResponse:
-    def __init__(self, scoring_functions: ScoringFunctionsResource) -> None:
-        self._scoring_functions = scoring_functions
-
-        self.retrieve = to_raw_response_wrapper(
-            scoring_functions.retrieve,
-        )
-        self.list = to_raw_response_wrapper(
-            scoring_functions.list,
-        )
-        self.register = to_raw_response_wrapper(
-            scoring_functions.register,
-        )
-
-
-class AsyncScoringFunctionsResourceWithRawResponse:
-    def __init__(self, scoring_functions: AsyncScoringFunctionsResource) -> None:
-        self._scoring_functions = scoring_functions
-
-        self.retrieve = async_to_raw_response_wrapper(
-            scoring_functions.retrieve,
-        )
-        self.list = async_to_raw_response_wrapper(
-            scoring_functions.list,
-        )
-        self.register = async_to_raw_response_wrapper(
-            scoring_functions.register,
-        )
-
-
-class ScoringFunctionsResourceWithStreamingResponse:
-    def __init__(self, scoring_functions: ScoringFunctionsResource) -> None:
-        self._scoring_functions = scoring_functions
-
-        self.retrieve = to_streamed_response_wrapper(
-            scoring_functions.retrieve,
-        )
-        self.list = to_streamed_response_wrapper(
-            scoring_functions.list,
-        )
-        self.register = to_streamed_response_wrapper(
-            scoring_functions.register,
-        )
-
-
-class AsyncScoringFunctionsResourceWithStreamingResponse:
-    def __init__(self, scoring_functions: AsyncScoringFunctionsResource) -> None:
-        self._scoring_functions = scoring_functions
-
-        self.retrieve = async_to_streamed_response_wrapper(
-            scoring_functions.retrieve,
-        )
-        self.list = async_to_streamed_response_wrapper(
-            scoring_functions.list,
-        )
-        self.register = async_to_streamed_response_wrapper(
-            scoring_functions.register,
-        )
diff --git a/src/llama_stack_client/resources/shields.py b/src/llama_stack_client/resources/shields.py
deleted file mode 100644
index cf0c7678..00000000
--- a/src/llama_stack_client/resources/shields.py
+++ /dev/null
@@ -1,341 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Type, Union, Iterable, cast
-
-import httpx
-
-from ..types import shield_register_params
-from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._utils import maybe_transform, async_maybe_transform
-from .._compat import cached_property
-from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from .._wrappers import DataWrapper
-from .._base_client import make_request_options
-from ..types.shield import Shield
-from ..types.shield_list_response import ShieldListResponse
-
-__all__ = ["ShieldsResource", "AsyncShieldsResource"]
-
-
-class ShieldsResource(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> ShieldsResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return ShieldsResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> ShieldsResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return ShieldsResourceWithStreamingResponse(self)
-
-    def retrieve(
-        self,
-        identifier: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Shield:
-        """
-        Get a shield by its identifier.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not identifier:
-            raise ValueError(f"Expected a non-empty value for `identifier` but received {identifier!r}")
-        return self._get(
-            f"/v1/shields/{identifier}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Shield,
-        )
-
-    def list(
-        self,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ShieldListResponse:
-        """List all shields."""
-        return self._get(
-            "/v1/shields",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                post_parser=DataWrapper[ShieldListResponse]._unwrapper,
-            ),
-            cast_to=cast(Type[ShieldListResponse], DataWrapper[ShieldListResponse]),
-        )
-
-    def register(
-        self,
-        *,
-        shield_id: str,
-        params: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        provider_id: str | NotGiven = NOT_GIVEN,
-        provider_shield_id: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Shield:
-        """
-        Register a shield.
-
-        Args:
-          shield_id: The identifier of the shield to register.
-
-          params: The parameters of the shield.
-
-          provider_id: The identifier of the provider.
-
-          provider_shield_id: The identifier of the shield in the provider.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._post(
-            "/v1/shields",
-            body=maybe_transform(
-                {
-                    "shield_id": shield_id,
-                    "params": params,
-                    "provider_id": provider_id,
-                    "provider_shield_id": provider_shield_id,
-                },
-                shield_register_params.ShieldRegisterParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Shield,
-        )
-
-
-class AsyncShieldsResource(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncShieldsResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncShieldsResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncShieldsResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncShieldsResourceWithStreamingResponse(self)
-
-    async def retrieve(
-        self,
-        identifier: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Shield:
-        """
-        Get a shield by its identifier.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not identifier:
-            raise ValueError(f"Expected a non-empty value for `identifier` but received {identifier!r}")
-        return await self._get(
-            f"/v1/shields/{identifier}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Shield,
-        )
-
-    async def list(
-        self,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ShieldListResponse:
-        """List all shields."""
-        return await self._get(
-            "/v1/shields",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                post_parser=DataWrapper[ShieldListResponse]._unwrapper,
-            ),
-            cast_to=cast(Type[ShieldListResponse], DataWrapper[ShieldListResponse]),
-        )
-
-    async def register(
-        self,
-        *,
-        shield_id: str,
-        params: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        provider_id: str | NotGiven = NOT_GIVEN,
-        provider_shield_id: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Shield:
-        """
-        Register a shield.
-
-        Args:
-          shield_id: The identifier of the shield to register.
-
-          params: The parameters of the shield.
-
-          provider_id: The identifier of the provider.
-
-          provider_shield_id: The identifier of the shield in the provider.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return await self._post(
-            "/v1/shields",
-            body=await async_maybe_transform(
-                {
-                    "shield_id": shield_id,
-                    "params": params,
-                    "provider_id": provider_id,
-                    "provider_shield_id": provider_shield_id,
-                },
-                shield_register_params.ShieldRegisterParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Shield,
-        )
-
-
-class ShieldsResourceWithRawResponse:
-    def __init__(self, shields: ShieldsResource) -> None:
-        self._shields = shields
-
-        self.retrieve = to_raw_response_wrapper(
-            shields.retrieve,
-        )
-        self.list = to_raw_response_wrapper(
-            shields.list,
-        )
-        self.register = to_raw_response_wrapper(
-            shields.register,
-        )
-
-
-class AsyncShieldsResourceWithRawResponse:
-    def __init__(self, shields: AsyncShieldsResource) -> None:
-        self._shields = shields
-
-        self.retrieve = async_to_raw_response_wrapper(
-            shields.retrieve,
-        )
-        self.list = async_to_raw_response_wrapper(
-            shields.list,
-        )
-        self.register = async_to_raw_response_wrapper(
-            shields.register,
-        )
-
-
-class ShieldsResourceWithStreamingResponse:
-    def __init__(self, shields: ShieldsResource) -> None:
-        self._shields = shields
-
-        self.retrieve = to_streamed_response_wrapper(
-            shields.retrieve,
-        )
-        self.list = to_streamed_response_wrapper(
-            shields.list,
-        )
-        self.register = to_streamed_response_wrapper(
-            shields.register,
-        )
-
-
-class AsyncShieldsResourceWithStreamingResponse:
-    def __init__(self, shields: AsyncShieldsResource) -> None:
-        self._shields = shields
-
-        self.retrieve = async_to_streamed_response_wrapper(
-            shields.retrieve,
-        )
-        self.list = async_to_streamed_response_wrapper(
-            shields.list,
-        )
-        self.register = async_to_streamed_response_wrapper(
-            shields.register,
-        )
diff --git a/src/llama_stack_client/resources/synthetic_data_generation.py b/src/llama_stack_client/resources/synthetic_data_generation.py
deleted file mode 100644
index 6e4e5a08..00000000
--- a/src/llama_stack_client/resources/synthetic_data_generation.py
+++ /dev/null
@@ -1,185 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Iterable
-from typing_extensions import Literal
-
-import httpx
-
-from ..types import synthetic_data_generation_generate_params
-from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._utils import maybe_transform, async_maybe_transform
-from .._compat import cached_property
-from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from .._base_client import make_request_options
-from ..types.shared_params.message import Message
-from ..types.synthetic_data_generation_response import SyntheticDataGenerationResponse
-
-__all__ = ["SyntheticDataGenerationResource", "AsyncSyntheticDataGenerationResource"]
-
-
-class SyntheticDataGenerationResource(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> SyntheticDataGenerationResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return SyntheticDataGenerationResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> SyntheticDataGenerationResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return SyntheticDataGenerationResourceWithStreamingResponse(self)
-
-    def generate(
-        self,
-        *,
-        dialogs: Iterable[Message],
-        filtering_function: Literal["none", "random", "top_k", "top_p", "top_k_top_p", "sigmoid"],
-        model: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> SyntheticDataGenerationResponse:
-        """
-        Args:
-          filtering_function: The type of filtering function.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._post(
-            "/v1/synthetic-data-generation/generate",
-            body=maybe_transform(
-                {
-                    "dialogs": dialogs,
-                    "filtering_function": filtering_function,
-                    "model": model,
-                },
-                synthetic_data_generation_generate_params.SyntheticDataGenerationGenerateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=SyntheticDataGenerationResponse,
-        )
-
-
-class AsyncSyntheticDataGenerationResource(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncSyntheticDataGenerationResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncSyntheticDataGenerationResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncSyntheticDataGenerationResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncSyntheticDataGenerationResourceWithStreamingResponse(self)
-
-    async def generate(
-        self,
-        *,
-        dialogs: Iterable[Message],
-        filtering_function: Literal["none", "random", "top_k", "top_p", "top_k_top_p", "sigmoid"],
-        model: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> SyntheticDataGenerationResponse:
-        """
-        Args:
-          filtering_function: The type of filtering function.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return await self._post(
-            "/v1/synthetic-data-generation/generate",
-            body=await async_maybe_transform(
-                {
-                    "dialogs": dialogs,
-                    "filtering_function": filtering_function,
-                    "model": model,
-                },
-                synthetic_data_generation_generate_params.SyntheticDataGenerationGenerateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=SyntheticDataGenerationResponse,
-        )
-
-
-class SyntheticDataGenerationResourceWithRawResponse:
-    def __init__(self, synthetic_data_generation: SyntheticDataGenerationResource) -> None:
-        self._synthetic_data_generation = synthetic_data_generation
-
-        self.generate = to_raw_response_wrapper(
-            synthetic_data_generation.generate,
-        )
-
-
-class AsyncSyntheticDataGenerationResourceWithRawResponse:
-    def __init__(self, synthetic_data_generation: AsyncSyntheticDataGenerationResource) -> None:
-        self._synthetic_data_generation = synthetic_data_generation
-
-        self.generate = async_to_raw_response_wrapper(
-            synthetic_data_generation.generate,
-        )
-
-
-class SyntheticDataGenerationResourceWithStreamingResponse:
-    def __init__(self, synthetic_data_generation: SyntheticDataGenerationResource) -> None:
-        self._synthetic_data_generation = synthetic_data_generation
-
-        self.generate = to_streamed_response_wrapper(
-            synthetic_data_generation.generate,
-        )
-
-
-class AsyncSyntheticDataGenerationResourceWithStreamingResponse:
-    def __init__(self, synthetic_data_generation: AsyncSyntheticDataGenerationResource) -> None:
-        self._synthetic_data_generation = synthetic_data_generation
-
-        self.generate = async_to_streamed_response_wrapper(
-            synthetic_data_generation.generate,
-        )
diff --git a/src/llama_stack_client/resources/tool_runtime/__init__.py b/src/llama_stack_client/resources/tool_runtime/__init__.py
deleted file mode 100644
index 2ed86a39..00000000
--- a/src/llama_stack_client/resources/tool_runtime/__init__.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .rag_tool import (
-    RagToolResource,
-    AsyncRagToolResource,
-    RagToolResourceWithRawResponse,
-    AsyncRagToolResourceWithRawResponse,
-    RagToolResourceWithStreamingResponse,
-    AsyncRagToolResourceWithStreamingResponse,
-)
-from .tool_runtime import (
-    ToolRuntimeResource,
-    AsyncToolRuntimeResource,
-    ToolRuntimeResourceWithRawResponse,
-    AsyncToolRuntimeResourceWithRawResponse,
-    ToolRuntimeResourceWithStreamingResponse,
-    AsyncToolRuntimeResourceWithStreamingResponse,
-)
-
-__all__ = [
-    "RagToolResource",
-    "AsyncRagToolResource",
-    "RagToolResourceWithRawResponse",
-    "AsyncRagToolResourceWithRawResponse",
-    "RagToolResourceWithStreamingResponse",
-    "AsyncRagToolResourceWithStreamingResponse",
-    "ToolRuntimeResource",
-    "AsyncToolRuntimeResource",
-    "ToolRuntimeResourceWithRawResponse",
-    "AsyncToolRuntimeResourceWithRawResponse",
-    "ToolRuntimeResourceWithStreamingResponse",
-    "AsyncToolRuntimeResourceWithStreamingResponse",
-]
diff --git a/src/llama_stack_client/resources/tool_runtime/rag_tool.py b/src/llama_stack_client/resources/tool_runtime/rag_tool.py
deleted file mode 100644
index 65ef0463..00000000
--- a/src/llama_stack_client/resources/tool_runtime/rag_tool.py
+++ /dev/null
@@ -1,290 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import List, Iterable
-
-import httpx
-
-from ..._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
-from ..._utils import maybe_transform, async_maybe_transform
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from ..._base_client import make_request_options
-from ...types.tool_runtime import rag_tool_query_params, rag_tool_insert_params
-from ...types.shared.query_result import QueryResult
-from ...types.shared_params.document import Document
-from ...types.shared_params.query_config import QueryConfig
-from ...types.shared_params.interleaved_content import InterleavedContent
-
-__all__ = ["RagToolResource", "AsyncRagToolResource"]
-
-
-class RagToolResource(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> RagToolResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return RagToolResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> RagToolResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return RagToolResourceWithStreamingResponse(self)
-
-    def insert(
-        self,
-        *,
-        chunk_size_in_tokens: int,
-        documents: Iterable[Document],
-        vector_db_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> None:
-        """
-        Index documents so they can be used by the RAG system
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        return self._post(
-            "/v1/tool-runtime/rag-tool/insert",
-            body=maybe_transform(
-                {
-                    "chunk_size_in_tokens": chunk_size_in_tokens,
-                    "documents": documents,
-                    "vector_db_id": vector_db_id,
-                },
-                rag_tool_insert_params.RagToolInsertParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=NoneType,
-        )
-
-    def query(
-        self,
-        *,
-        content: InterleavedContent,
-        vector_db_ids: List[str],
-        query_config: QueryConfig | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> QueryResult:
-        """
-        Query the RAG system for context; typically invoked by the agent
-
-        Args:
-          content: A image content item
-
-          query_config: Configuration for the RAG query generation.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._post(
-            "/v1/tool-runtime/rag-tool/query",
-            body=maybe_transform(
-                {
-                    "content": content,
-                    "vector_db_ids": vector_db_ids,
-                    "query_config": query_config,
-                },
-                rag_tool_query_params.RagToolQueryParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=QueryResult,
-        )
-
-
-class AsyncRagToolResource(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncRagToolResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncRagToolResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncRagToolResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncRagToolResourceWithStreamingResponse(self)
-
-    async def insert(
-        self,
-        *,
-        chunk_size_in_tokens: int,
-        documents: Iterable[Document],
-        vector_db_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> None:
-        """
-        Index documents so they can be used by the RAG system
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        return await self._post(
-            "/v1/tool-runtime/rag-tool/insert",
-            body=await async_maybe_transform(
-                {
-                    "chunk_size_in_tokens": chunk_size_in_tokens,
-                    "documents": documents,
-                    "vector_db_id": vector_db_id,
-                },
-                rag_tool_insert_params.RagToolInsertParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=NoneType,
-        )
-
-    async def query(
-        self,
-        *,
-        content: InterleavedContent,
-        vector_db_ids: List[str],
-        query_config: QueryConfig | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> QueryResult:
-        """
-        Query the RAG system for context; typically invoked by the agent
-
-        Args:
-          content: A image content item
-
-          query_config: Configuration for the RAG query generation.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return await self._post(
-            "/v1/tool-runtime/rag-tool/query",
-            body=await async_maybe_transform(
-                {
-                    "content": content,
-                    "vector_db_ids": vector_db_ids,
-                    "query_config": query_config,
-                },
-                rag_tool_query_params.RagToolQueryParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=QueryResult,
-        )
-
-
-class RagToolResourceWithRawResponse:
-    def __init__(self, rag_tool: RagToolResource) -> None:
-        self._rag_tool = rag_tool
-
-        self.insert = to_raw_response_wrapper(
-            rag_tool.insert,
-        )
-        self.query = to_raw_response_wrapper(
-            rag_tool.query,
-        )
-
-
-class AsyncRagToolResourceWithRawResponse:
-    def __init__(self, rag_tool: AsyncRagToolResource) -> None:
-        self._rag_tool = rag_tool
-
-        self.insert = async_to_raw_response_wrapper(
-            rag_tool.insert,
-        )
-        self.query = async_to_raw_response_wrapper(
-            rag_tool.query,
-        )
-
-
-class RagToolResourceWithStreamingResponse:
-    def __init__(self, rag_tool: RagToolResource) -> None:
-        self._rag_tool = rag_tool
-
-        self.insert = to_streamed_response_wrapper(
-            rag_tool.insert,
-        )
-        self.query = to_streamed_response_wrapper(
-            rag_tool.query,
-        )
-
-
-class AsyncRagToolResourceWithStreamingResponse:
-    def __init__(self, rag_tool: AsyncRagToolResource) -> None:
-        self._rag_tool = rag_tool
-
-        self.insert = async_to_streamed_response_wrapper(
-            rag_tool.insert,
-        )
-        self.query = async_to_streamed_response_wrapper(
-            rag_tool.query,
-        )
diff --git a/src/llama_stack_client/resources/tool_runtime/tool_runtime.py b/src/llama_stack_client/resources/tool_runtime/tool_runtime.py
deleted file mode 100644
index ecb17c38..00000000
--- a/src/llama_stack_client/resources/tool_runtime/tool_runtime.py
+++ /dev/null
@@ -1,327 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Type, Union, Iterable, cast
-
-import httpx
-
-from ...types import tool_runtime_list_tools_params, tool_runtime_invoke_tool_params
-from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ..._utils import maybe_transform, async_maybe_transform
-from .rag_tool import (
-    RagToolResource,
-    AsyncRagToolResource,
-    RagToolResourceWithRawResponse,
-    AsyncRagToolResourceWithRawResponse,
-    RagToolResourceWithStreamingResponse,
-    AsyncRagToolResourceWithStreamingResponse,
-)
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from ..._wrappers import DataWrapper
-from ..._base_client import make_request_options
-from ...types.tool_invocation_result import ToolInvocationResult
-from ...types.tool_runtime_list_tools_response import ToolRuntimeListToolsResponse
-
-__all__ = ["ToolRuntimeResource", "AsyncToolRuntimeResource"]
-
-
-class ToolRuntimeResource(SyncAPIResource):
-    @cached_property
-    def rag_tool(self) -> RagToolResource:
-        return RagToolResource(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> ToolRuntimeResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return ToolRuntimeResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> ToolRuntimeResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return ToolRuntimeResourceWithStreamingResponse(self)
-
-    def invoke_tool(
-        self,
-        *,
-        kwargs: Dict[str, Union[bool, float, str, Iterable[object], object, None]],
-        tool_name: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ToolInvocationResult:
-        """
-        Run a tool with the given arguments.
-
-        Args:
-          kwargs: A dictionary of arguments to pass to the tool.
-
-          tool_name: The name of the tool to invoke.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._post(
-            "/v1/tool-runtime/invoke",
-            body=maybe_transform(
-                {
-                    "kwargs": kwargs,
-                    "tool_name": tool_name,
-                },
-                tool_runtime_invoke_tool_params.ToolRuntimeInvokeToolParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=ToolInvocationResult,
-        )
-
-    def list_tools(
-        self,
-        *,
-        mcp_endpoint: tool_runtime_list_tools_params.McpEndpoint | NotGiven = NOT_GIVEN,
-        tool_group_id: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ToolRuntimeListToolsResponse:
-        """
-        List all tools in the runtime.
-
-        Args:
-          mcp_endpoint: The MCP endpoint to use for the tool group.
-
-          tool_group_id: The ID of the tool group to list tools for.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._get(
-            "/v1/tool-runtime/list-tools",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform(
-                    {
-                        "mcp_endpoint": mcp_endpoint,
-                        "tool_group_id": tool_group_id,
-                    },
-                    tool_runtime_list_tools_params.ToolRuntimeListToolsParams,
-                ),
-                post_parser=DataWrapper[ToolRuntimeListToolsResponse]._unwrapper,
-            ),
-            cast_to=cast(Type[ToolRuntimeListToolsResponse], DataWrapper[ToolRuntimeListToolsResponse]),
-        )
-
-
-class AsyncToolRuntimeResource(AsyncAPIResource):
-    @cached_property
-    def rag_tool(self) -> AsyncRagToolResource:
-        return AsyncRagToolResource(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> AsyncToolRuntimeResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncToolRuntimeResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncToolRuntimeResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncToolRuntimeResourceWithStreamingResponse(self)
-
-    async def invoke_tool(
-        self,
-        *,
-        kwargs: Dict[str, Union[bool, float, str, Iterable[object], object, None]],
-        tool_name: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ToolInvocationResult:
-        """
-        Run a tool with the given arguments.
-
-        Args:
-          kwargs: A dictionary of arguments to pass to the tool.
-
-          tool_name: The name of the tool to invoke.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return await self._post(
-            "/v1/tool-runtime/invoke",
-            body=await async_maybe_transform(
-                {
-                    "kwargs": kwargs,
-                    "tool_name": tool_name,
-                },
-                tool_runtime_invoke_tool_params.ToolRuntimeInvokeToolParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=ToolInvocationResult,
-        )
-
-    async def list_tools(
-        self,
-        *,
-        mcp_endpoint: tool_runtime_list_tools_params.McpEndpoint | NotGiven = NOT_GIVEN,
-        tool_group_id: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ToolRuntimeListToolsResponse:
-        """
-        List all tools in the runtime.
-
-        Args:
-          mcp_endpoint: The MCP endpoint to use for the tool group.
-
-          tool_group_id: The ID of the tool group to list tools for.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return await self._get(
-            "/v1/tool-runtime/list-tools",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=await async_maybe_transform(
-                    {
-                        "mcp_endpoint": mcp_endpoint,
-                        "tool_group_id": tool_group_id,
-                    },
-                    tool_runtime_list_tools_params.ToolRuntimeListToolsParams,
-                ),
-                post_parser=DataWrapper[ToolRuntimeListToolsResponse]._unwrapper,
-            ),
-            cast_to=cast(Type[ToolRuntimeListToolsResponse], DataWrapper[ToolRuntimeListToolsResponse]),
-        )
-
-
-class ToolRuntimeResourceWithRawResponse:
-    def __init__(self, tool_runtime: ToolRuntimeResource) -> None:
-        self._tool_runtime = tool_runtime
-
-        self.invoke_tool = to_raw_response_wrapper(
-            tool_runtime.invoke_tool,
-        )
-        self.list_tools = to_raw_response_wrapper(
-            tool_runtime.list_tools,
-        )
-
-    @cached_property
-    def rag_tool(self) -> RagToolResourceWithRawResponse:
-        return RagToolResourceWithRawResponse(self._tool_runtime.rag_tool)
-
-
-class AsyncToolRuntimeResourceWithRawResponse:
-    def __init__(self, tool_runtime: AsyncToolRuntimeResource) -> None:
-        self._tool_runtime = tool_runtime
-
-        self.invoke_tool = async_to_raw_response_wrapper(
-            tool_runtime.invoke_tool,
-        )
-        self.list_tools = async_to_raw_response_wrapper(
-            tool_runtime.list_tools,
-        )
-
-    @cached_property
-    def rag_tool(self) -> AsyncRagToolResourceWithRawResponse:
-        return AsyncRagToolResourceWithRawResponse(self._tool_runtime.rag_tool)
-
-
-class ToolRuntimeResourceWithStreamingResponse:
-    def __init__(self, tool_runtime: ToolRuntimeResource) -> None:
-        self._tool_runtime = tool_runtime
-
-        self.invoke_tool = to_streamed_response_wrapper(
-            tool_runtime.invoke_tool,
-        )
-        self.list_tools = to_streamed_response_wrapper(
-            tool_runtime.list_tools,
-        )
-
-    @cached_property
-    def rag_tool(self) -> RagToolResourceWithStreamingResponse:
-        return RagToolResourceWithStreamingResponse(self._tool_runtime.rag_tool)
-
-
-class AsyncToolRuntimeResourceWithStreamingResponse:
-    def __init__(self, tool_runtime: AsyncToolRuntimeResource) -> None:
-        self._tool_runtime = tool_runtime
-
-        self.invoke_tool = async_to_streamed_response_wrapper(
-            tool_runtime.invoke_tool,
-        )
-        self.list_tools = async_to_streamed_response_wrapper(
-            tool_runtime.list_tools,
-        )
-
-    @cached_property
-    def rag_tool(self) -> AsyncRagToolResourceWithStreamingResponse:
-        return AsyncRagToolResourceWithStreamingResponse(self._tool_runtime.rag_tool)
diff --git a/src/llama_stack_client/resources/toolgroups.py b/src/llama_stack_client/resources/toolgroups.py
deleted file mode 100644
index 3f0ba200..00000000
--- a/src/llama_stack_client/resources/toolgroups.py
+++ /dev/null
@@ -1,423 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Type, Union, Iterable, cast
-
-import httpx
-
-from ..types import toolgroup_register_params
-from .._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
-from .._utils import maybe_transform, async_maybe_transform
-from .._compat import cached_property
-from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from .._wrappers import DataWrapper
-from .._base_client import make_request_options
-from ..types.tool_group import ToolGroup
-from ..types.toolgroup_list_response import ToolgroupListResponse
-
-__all__ = ["ToolgroupsResource", "AsyncToolgroupsResource"]
-
-
-class ToolgroupsResource(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> ToolgroupsResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return ToolgroupsResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> ToolgroupsResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return ToolgroupsResourceWithStreamingResponse(self)
-
-    def list(
-        self,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ToolgroupListResponse:
-        """List tool groups with optional provider."""
-        return self._get(
-            "/v1/toolgroups",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                post_parser=DataWrapper[ToolgroupListResponse]._unwrapper,
-            ),
-            cast_to=cast(Type[ToolgroupListResponse], DataWrapper[ToolgroupListResponse]),
-        )
-
-    def get(
-        self,
-        toolgroup_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ToolGroup:
-        """
-        Get a tool group by its ID.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not toolgroup_id:
-            raise ValueError(f"Expected a non-empty value for `toolgroup_id` but received {toolgroup_id!r}")
-        return self._get(
-            f"/v1/toolgroups/{toolgroup_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=ToolGroup,
-        )
-
-    def register(
-        self,
-        *,
-        provider_id: str,
-        toolgroup_id: str,
-        args: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        mcp_endpoint: toolgroup_register_params.McpEndpoint | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> None:
-        """
-        Register a tool group.
-
-        Args:
-          provider_id: The ID of the provider to use for the tool group.
-
-          toolgroup_id: The ID of the tool group to register.
-
-          args: A dictionary of arguments to pass to the tool group.
-
-          mcp_endpoint: The MCP endpoint to use for the tool group.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        return self._post(
-            "/v1/toolgroups",
-            body=maybe_transform(
-                {
-                    "provider_id": provider_id,
-                    "toolgroup_id": toolgroup_id,
-                    "args": args,
-                    "mcp_endpoint": mcp_endpoint,
-                },
-                toolgroup_register_params.ToolgroupRegisterParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=NoneType,
-        )
-
-    def unregister(
-        self,
-        toolgroup_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> None:
-        """
-        Unregister a tool group.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not toolgroup_id:
-            raise ValueError(f"Expected a non-empty value for `toolgroup_id` but received {toolgroup_id!r}")
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        return self._delete(
-            f"/v1/toolgroups/{toolgroup_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=NoneType,
-        )
-
-
-class AsyncToolgroupsResource(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncToolgroupsResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncToolgroupsResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncToolgroupsResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncToolgroupsResourceWithStreamingResponse(self)
-
-    async def list(
-        self,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ToolgroupListResponse:
-        """List tool groups with optional provider."""
-        return await self._get(
-            "/v1/toolgroups",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                post_parser=DataWrapper[ToolgroupListResponse]._unwrapper,
-            ),
-            cast_to=cast(Type[ToolgroupListResponse], DataWrapper[ToolgroupListResponse]),
-        )
-
-    async def get(
-        self,
-        toolgroup_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ToolGroup:
-        """
-        Get a tool group by its ID.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not toolgroup_id:
-            raise ValueError(f"Expected a non-empty value for `toolgroup_id` but received {toolgroup_id!r}")
-        return await self._get(
-            f"/v1/toolgroups/{toolgroup_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=ToolGroup,
-        )
-
-    async def register(
-        self,
-        *,
-        provider_id: str,
-        toolgroup_id: str,
-        args: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        mcp_endpoint: toolgroup_register_params.McpEndpoint | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> None:
-        """
-        Register a tool group.
-
-        Args:
-          provider_id: The ID of the provider to use for the tool group.
-
-          toolgroup_id: The ID of the tool group to register.
-
-          args: A dictionary of arguments to pass to the tool group.
-
-          mcp_endpoint: The MCP endpoint to use for the tool group.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        return await self._post(
-            "/v1/toolgroups",
-            body=await async_maybe_transform(
-                {
-                    "provider_id": provider_id,
-                    "toolgroup_id": toolgroup_id,
-                    "args": args,
-                    "mcp_endpoint": mcp_endpoint,
-                },
-                toolgroup_register_params.ToolgroupRegisterParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=NoneType,
-        )
-
-    async def unregister(
-        self,
-        toolgroup_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> None:
-        """
-        Unregister a tool group.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not toolgroup_id:
-            raise ValueError(f"Expected a non-empty value for `toolgroup_id` but received {toolgroup_id!r}")
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        return await self._delete(
-            f"/v1/toolgroups/{toolgroup_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=NoneType,
-        )
-
-
-class ToolgroupsResourceWithRawResponse:
-    def __init__(self, toolgroups: ToolgroupsResource) -> None:
-        self._toolgroups = toolgroups
-
-        self.list = to_raw_response_wrapper(
-            toolgroups.list,
-        )
-        self.get = to_raw_response_wrapper(
-            toolgroups.get,
-        )
-        self.register = to_raw_response_wrapper(
-            toolgroups.register,
-        )
-        self.unregister = to_raw_response_wrapper(
-            toolgroups.unregister,
-        )
-
-
-class AsyncToolgroupsResourceWithRawResponse:
-    def __init__(self, toolgroups: AsyncToolgroupsResource) -> None:
-        self._toolgroups = toolgroups
-
-        self.list = async_to_raw_response_wrapper(
-            toolgroups.list,
-        )
-        self.get = async_to_raw_response_wrapper(
-            toolgroups.get,
-        )
-        self.register = async_to_raw_response_wrapper(
-            toolgroups.register,
-        )
-        self.unregister = async_to_raw_response_wrapper(
-            toolgroups.unregister,
-        )
-
-
-class ToolgroupsResourceWithStreamingResponse:
-    def __init__(self, toolgroups: ToolgroupsResource) -> None:
-        self._toolgroups = toolgroups
-
-        self.list = to_streamed_response_wrapper(
-            toolgroups.list,
-        )
-        self.get = to_streamed_response_wrapper(
-            toolgroups.get,
-        )
-        self.register = to_streamed_response_wrapper(
-            toolgroups.register,
-        )
-        self.unregister = to_streamed_response_wrapper(
-            toolgroups.unregister,
-        )
-
-
-class AsyncToolgroupsResourceWithStreamingResponse:
-    def __init__(self, toolgroups: AsyncToolgroupsResource) -> None:
-        self._toolgroups = toolgroups
-
-        self.list = async_to_streamed_response_wrapper(
-            toolgroups.list,
-        )
-        self.get = async_to_streamed_response_wrapper(
-            toolgroups.get,
-        )
-        self.register = async_to_streamed_response_wrapper(
-            toolgroups.register,
-        )
-        self.unregister = async_to_streamed_response_wrapper(
-            toolgroups.unregister,
-        )
diff --git a/src/llama_stack_client/resources/tools.py b/src/llama_stack_client/resources/tools.py
deleted file mode 100644
index 7954f776..00000000
--- a/src/llama_stack_client/resources/tools.py
+++ /dev/null
@@ -1,257 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Type, cast
-
-import httpx
-
-from ..types import tool_list_params
-from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._utils import maybe_transform, async_maybe_transform
-from .._compat import cached_property
-from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from .._wrappers import DataWrapper
-from ..types.tool import Tool
-from .._base_client import make_request_options
-from ..types.tool_list_response import ToolListResponse
-
-__all__ = ["ToolsResource", "AsyncToolsResource"]
-
-
-class ToolsResource(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> ToolsResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return ToolsResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> ToolsResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return ToolsResourceWithStreamingResponse(self)
-
-    def list(
-        self,
-        *,
-        toolgroup_id: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ToolListResponse:
-        """
-        List tools with optional tool group.
-
-        Args:
-          toolgroup_id: The ID of the tool group to list tools for.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._get(
-            "/v1/tools",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform({"toolgroup_id": toolgroup_id}, tool_list_params.ToolListParams),
-                post_parser=DataWrapper[ToolListResponse]._unwrapper,
-            ),
-            cast_to=cast(Type[ToolListResponse], DataWrapper[ToolListResponse]),
-        )
-
-    def get(
-        self,
-        tool_name: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Tool:
-        """
-        Get a tool by its name.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not tool_name:
-            raise ValueError(f"Expected a non-empty value for `tool_name` but received {tool_name!r}")
-        return self._get(
-            f"/v1/tools/{tool_name}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Tool,
-        )
-
-
-class AsyncToolsResource(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncToolsResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncToolsResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncToolsResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncToolsResourceWithStreamingResponse(self)
-
-    async def list(
-        self,
-        *,
-        toolgroup_id: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ToolListResponse:
-        """
-        List tools with optional tool group.
-
-        Args:
-          toolgroup_id: The ID of the tool group to list tools for.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return await self._get(
-            "/v1/tools",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=await async_maybe_transform({"toolgroup_id": toolgroup_id}, tool_list_params.ToolListParams),
-                post_parser=DataWrapper[ToolListResponse]._unwrapper,
-            ),
-            cast_to=cast(Type[ToolListResponse], DataWrapper[ToolListResponse]),
-        )
-
-    async def get(
-        self,
-        tool_name: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Tool:
-        """
-        Get a tool by its name.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not tool_name:
-            raise ValueError(f"Expected a non-empty value for `tool_name` but received {tool_name!r}")
-        return await self._get(
-            f"/v1/tools/{tool_name}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Tool,
-        )
-
-
-class ToolsResourceWithRawResponse:
-    def __init__(self, tools: ToolsResource) -> None:
-        self._tools = tools
-
-        self.list = to_raw_response_wrapper(
-            tools.list,
-        )
-        self.get = to_raw_response_wrapper(
-            tools.get,
-        )
-
-
-class AsyncToolsResourceWithRawResponse:
-    def __init__(self, tools: AsyncToolsResource) -> None:
-        self._tools = tools
-
-        self.list = async_to_raw_response_wrapper(
-            tools.list,
-        )
-        self.get = async_to_raw_response_wrapper(
-            tools.get,
-        )
-
-
-class ToolsResourceWithStreamingResponse:
-    def __init__(self, tools: ToolsResource) -> None:
-        self._tools = tools
-
-        self.list = to_streamed_response_wrapper(
-            tools.list,
-        )
-        self.get = to_streamed_response_wrapper(
-            tools.get,
-        )
-
-
-class AsyncToolsResourceWithStreamingResponse:
-    def __init__(self, tools: AsyncToolsResource) -> None:
-        self._tools = tools
-
-        self.list = async_to_streamed_response_wrapper(
-            tools.list,
-        )
-        self.get = async_to_streamed_response_wrapper(
-            tools.get,
-        )
diff --git a/src/llama_stack_client/resources/vector_dbs.py b/src/llama_stack_client/resources/vector_dbs.py
deleted file mode 100644
index 3838c38e..00000000
--- a/src/llama_stack_client/resources/vector_dbs.py
+++ /dev/null
@@ -1,430 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Type, cast
-
-import httpx
-
-from ..types import vector_db_register_params
-from .._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
-from .._utils import maybe_transform, async_maybe_transform
-from .._compat import cached_property
-from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from .._wrappers import DataWrapper
-from .._base_client import make_request_options
-from ..types.vector_db_list_response import VectorDBListResponse
-from ..types.vector_db_register_response import VectorDBRegisterResponse
-from ..types.vector_db_retrieve_response import VectorDBRetrieveResponse
-
-__all__ = ["VectorDBsResource", "AsyncVectorDBsResource"]
-
-
-class VectorDBsResource(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> VectorDBsResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return VectorDBsResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> VectorDBsResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return VectorDBsResourceWithStreamingResponse(self)
-
-    def retrieve(
-        self,
-        vector_db_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> VectorDBRetrieveResponse:
-        """
-        Get a vector database by its identifier.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not vector_db_id:
-            raise ValueError(f"Expected a non-empty value for `vector_db_id` but received {vector_db_id!r}")
-        return self._get(
-            f"/v1/vector-dbs/{vector_db_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=VectorDBRetrieveResponse,
-        )
-
-    def list(
-        self,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> VectorDBListResponse:
-        """List all vector databases."""
-        return self._get(
-            "/v1/vector-dbs",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                post_parser=DataWrapper[VectorDBListResponse]._unwrapper,
-            ),
-            cast_to=cast(Type[VectorDBListResponse], DataWrapper[VectorDBListResponse]),
-        )
-
-    def register(
-        self,
-        *,
-        embedding_model: str,
-        vector_db_id: str,
-        embedding_dimension: int | NotGiven = NOT_GIVEN,
-        provider_id: str | NotGiven = NOT_GIVEN,
-        provider_vector_db_id: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> VectorDBRegisterResponse:
-        """
-        Register a vector database.
-
-        Args:
-          embedding_model: The embedding model to use.
-
-          vector_db_id: The identifier of the vector database to register.
-
-          embedding_dimension: The dimension of the embedding model.
-
-          provider_id: The identifier of the provider.
-
-          provider_vector_db_id: The identifier of the vector database in the provider.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._post(
-            "/v1/vector-dbs",
-            body=maybe_transform(
-                {
-                    "embedding_model": embedding_model,
-                    "vector_db_id": vector_db_id,
-                    "embedding_dimension": embedding_dimension,
-                    "provider_id": provider_id,
-                    "provider_vector_db_id": provider_vector_db_id,
-                },
-                vector_db_register_params.VectorDBRegisterParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=VectorDBRegisterResponse,
-        )
-
-    def unregister(
-        self,
-        vector_db_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> None:
-        """
-        Unregister a vector database.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not vector_db_id:
-            raise ValueError(f"Expected a non-empty value for `vector_db_id` but received {vector_db_id!r}")
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        return self._delete(
-            f"/v1/vector-dbs/{vector_db_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=NoneType,
-        )
-
-
-class AsyncVectorDBsResource(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncVectorDBsResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncVectorDBsResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncVectorDBsResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncVectorDBsResourceWithStreamingResponse(self)
-
-    async def retrieve(
-        self,
-        vector_db_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> VectorDBRetrieveResponse:
-        """
-        Get a vector database by its identifier.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not vector_db_id:
-            raise ValueError(f"Expected a non-empty value for `vector_db_id` but received {vector_db_id!r}")
-        return await self._get(
-            f"/v1/vector-dbs/{vector_db_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=VectorDBRetrieveResponse,
-        )
-
-    async def list(
-        self,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> VectorDBListResponse:
-        """List all vector databases."""
-        return await self._get(
-            "/v1/vector-dbs",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                post_parser=DataWrapper[VectorDBListResponse]._unwrapper,
-            ),
-            cast_to=cast(Type[VectorDBListResponse], DataWrapper[VectorDBListResponse]),
-        )
-
-    async def register(
-        self,
-        *,
-        embedding_model: str,
-        vector_db_id: str,
-        embedding_dimension: int | NotGiven = NOT_GIVEN,
-        provider_id: str | NotGiven = NOT_GIVEN,
-        provider_vector_db_id: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> VectorDBRegisterResponse:
-        """
-        Register a vector database.
-
-        Args:
-          embedding_model: The embedding model to use.
-
-          vector_db_id: The identifier of the vector database to register.
-
-          embedding_dimension: The dimension of the embedding model.
-
-          provider_id: The identifier of the provider.
-
-          provider_vector_db_id: The identifier of the vector database in the provider.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return await self._post(
-            "/v1/vector-dbs",
-            body=await async_maybe_transform(
-                {
-                    "embedding_model": embedding_model,
-                    "vector_db_id": vector_db_id,
-                    "embedding_dimension": embedding_dimension,
-                    "provider_id": provider_id,
-                    "provider_vector_db_id": provider_vector_db_id,
-                },
-                vector_db_register_params.VectorDBRegisterParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=VectorDBRegisterResponse,
-        )
-
-    async def unregister(
-        self,
-        vector_db_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> None:
-        """
-        Unregister a vector database.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not vector_db_id:
-            raise ValueError(f"Expected a non-empty value for `vector_db_id` but received {vector_db_id!r}")
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        return await self._delete(
-            f"/v1/vector-dbs/{vector_db_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=NoneType,
-        )
-
-
-class VectorDBsResourceWithRawResponse:
-    def __init__(self, vector_dbs: VectorDBsResource) -> None:
-        self._vector_dbs = vector_dbs
-
-        self.retrieve = to_raw_response_wrapper(
-            vector_dbs.retrieve,
-        )
-        self.list = to_raw_response_wrapper(
-            vector_dbs.list,
-        )
-        self.register = to_raw_response_wrapper(
-            vector_dbs.register,
-        )
-        self.unregister = to_raw_response_wrapper(
-            vector_dbs.unregister,
-        )
-
-
-class AsyncVectorDBsResourceWithRawResponse:
-    def __init__(self, vector_dbs: AsyncVectorDBsResource) -> None:
-        self._vector_dbs = vector_dbs
-
-        self.retrieve = async_to_raw_response_wrapper(
-            vector_dbs.retrieve,
-        )
-        self.list = async_to_raw_response_wrapper(
-            vector_dbs.list,
-        )
-        self.register = async_to_raw_response_wrapper(
-            vector_dbs.register,
-        )
-        self.unregister = async_to_raw_response_wrapper(
-            vector_dbs.unregister,
-        )
-
-
-class VectorDBsResourceWithStreamingResponse:
-    def __init__(self, vector_dbs: VectorDBsResource) -> None:
-        self._vector_dbs = vector_dbs
-
-        self.retrieve = to_streamed_response_wrapper(
-            vector_dbs.retrieve,
-        )
-        self.list = to_streamed_response_wrapper(
-            vector_dbs.list,
-        )
-        self.register = to_streamed_response_wrapper(
-            vector_dbs.register,
-        )
-        self.unregister = to_streamed_response_wrapper(
-            vector_dbs.unregister,
-        )
-
-
-class AsyncVectorDBsResourceWithStreamingResponse:
-    def __init__(self, vector_dbs: AsyncVectorDBsResource) -> None:
-        self._vector_dbs = vector_dbs
-
-        self.retrieve = async_to_streamed_response_wrapper(
-            vector_dbs.retrieve,
-        )
-        self.list = async_to_streamed_response_wrapper(
-            vector_dbs.list,
-        )
-        self.register = async_to_streamed_response_wrapper(
-            vector_dbs.register,
-        )
-        self.unregister = async_to_streamed_response_wrapper(
-            vector_dbs.unregister,
-        )
diff --git a/src/llama_stack_client/resources/vector_io.py b/src/llama_stack_client/resources/vector_io.py
deleted file mode 100644
index 3e361435..00000000
--- a/src/llama_stack_client/resources/vector_io.py
+++ /dev/null
@@ -1,314 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union, Iterable
-
-import httpx
-
-from ..types import vector_io_query_params, vector_io_insert_params
-from .._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
-from .._utils import maybe_transform, async_maybe_transform
-from .._compat import cached_property
-from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from .._base_client import make_request_options
-from ..types.query_chunks_response import QueryChunksResponse
-from ..types.shared_params.interleaved_content import InterleavedContent
-
-__all__ = ["VectorIoResource", "AsyncVectorIoResource"]
-
-
-class VectorIoResource(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> VectorIoResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return VectorIoResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> VectorIoResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return VectorIoResourceWithStreamingResponse(self)
-
-    def insert(
-        self,
-        *,
-        chunks: Iterable[vector_io_insert_params.Chunk],
-        vector_db_id: str,
-        ttl_seconds: int | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> None:
-        """Insert chunks into a vector database.
-
-        Args:
-          chunks: The chunks to insert.
-
-        Each `Chunk` should contain content which can be
-              interleaved text, images, or other types. `metadata`: `dict[str, Any]` and
-              `embedding`: `List[float]` are optional. If `metadata` is provided, you
-              configure how Llama Stack formats the chunk during generation. If `embedding` is
-              not provided, it will be computed later.
-
-          vector_db_id: The identifier of the vector database to insert the chunks into.
-
-          ttl_seconds: The time to live of the chunks.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        return self._post(
-            "/v1/vector-io/insert",
-            body=maybe_transform(
-                {
-                    "chunks": chunks,
-                    "vector_db_id": vector_db_id,
-                    "ttl_seconds": ttl_seconds,
-                },
-                vector_io_insert_params.VectorIoInsertParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=NoneType,
-        )
-
-    def query(
-        self,
-        *,
-        query: InterleavedContent,
-        vector_db_id: str,
-        params: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> QueryChunksResponse:
-        """
-        Query chunks from a vector database.
-
-        Args:
-          query: The query to search for.
-
-          vector_db_id: The identifier of the vector database to query.
-
-          params: The parameters of the query.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._post(
-            "/v1/vector-io/query",
-            body=maybe_transform(
-                {
-                    "query": query,
-                    "vector_db_id": vector_db_id,
-                    "params": params,
-                },
-                vector_io_query_params.VectorIoQueryParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=QueryChunksResponse,
-        )
-
-
-class AsyncVectorIoResource(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncVectorIoResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncVectorIoResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncVectorIoResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncVectorIoResourceWithStreamingResponse(self)
-
-    async def insert(
-        self,
-        *,
-        chunks: Iterable[vector_io_insert_params.Chunk],
-        vector_db_id: str,
-        ttl_seconds: int | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> None:
-        """Insert chunks into a vector database.
-
-        Args:
-          chunks: The chunks to insert.
-
-        Each `Chunk` should contain content which can be
-              interleaved text, images, or other types. `metadata`: `dict[str, Any]` and
-              `embedding`: `List[float]` are optional. If `metadata` is provided, you
-              configure how Llama Stack formats the chunk during generation. If `embedding` is
-              not provided, it will be computed later.
-
-          vector_db_id: The identifier of the vector database to insert the chunks into.
-
-          ttl_seconds: The time to live of the chunks.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
-        return await self._post(
-            "/v1/vector-io/insert",
-            body=await async_maybe_transform(
-                {
-                    "chunks": chunks,
-                    "vector_db_id": vector_db_id,
-                    "ttl_seconds": ttl_seconds,
-                },
-                vector_io_insert_params.VectorIoInsertParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=NoneType,
-        )
-
-    async def query(
-        self,
-        *,
-        query: InterleavedContent,
-        vector_db_id: str,
-        params: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> QueryChunksResponse:
-        """
-        Query chunks from a vector database.
-
-        Args:
-          query: The query to search for.
-
-          vector_db_id: The identifier of the vector database to query.
-
-          params: The parameters of the query.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return await self._post(
-            "/v1/vector-io/query",
-            body=await async_maybe_transform(
-                {
-                    "query": query,
-                    "vector_db_id": vector_db_id,
-                    "params": params,
-                },
-                vector_io_query_params.VectorIoQueryParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=QueryChunksResponse,
-        )
-
-
-class VectorIoResourceWithRawResponse:
-    def __init__(self, vector_io: VectorIoResource) -> None:
-        self._vector_io = vector_io
-
-        self.insert = to_raw_response_wrapper(
-            vector_io.insert,
-        )
-        self.query = to_raw_response_wrapper(
-            vector_io.query,
-        )
-
-
-class AsyncVectorIoResourceWithRawResponse:
-    def __init__(self, vector_io: AsyncVectorIoResource) -> None:
-        self._vector_io = vector_io
-
-        self.insert = async_to_raw_response_wrapper(
-            vector_io.insert,
-        )
-        self.query = async_to_raw_response_wrapper(
-            vector_io.query,
-        )
-
-
-class VectorIoResourceWithStreamingResponse:
-    def __init__(self, vector_io: VectorIoResource) -> None:
-        self._vector_io = vector_io
-
-        self.insert = to_streamed_response_wrapper(
-            vector_io.insert,
-        )
-        self.query = to_streamed_response_wrapper(
-            vector_io.query,
-        )
-
-
-class AsyncVectorIoResourceWithStreamingResponse:
-    def __init__(self, vector_io: AsyncVectorIoResource) -> None:
-        self._vector_io = vector_io
-
-        self.insert = async_to_streamed_response_wrapper(
-            vector_io.insert,
-        )
-        self.query = async_to_streamed_response_wrapper(
-            vector_io.query,
-        )
diff --git a/src/llama_stack_client/resources/vector_stores/__init__.py b/src/llama_stack_client/resources/vector_stores/__init__.py
deleted file mode 100644
index 85d202da..00000000
--- a/src/llama_stack_client/resources/vector_stores/__init__.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .files import (
-    FilesResource,
-    AsyncFilesResource,
-    FilesResourceWithRawResponse,
-    AsyncFilesResourceWithRawResponse,
-    FilesResourceWithStreamingResponse,
-    AsyncFilesResourceWithStreamingResponse,
-)
-from .vector_stores import (
-    VectorStoresResource,
-    AsyncVectorStoresResource,
-    VectorStoresResourceWithRawResponse,
-    AsyncVectorStoresResourceWithRawResponse,
-    VectorStoresResourceWithStreamingResponse,
-    AsyncVectorStoresResourceWithStreamingResponse,
-)
-
-__all__ = [
-    "FilesResource",
-    "AsyncFilesResource",
-    "FilesResourceWithRawResponse",
-    "AsyncFilesResourceWithRawResponse",
-    "FilesResourceWithStreamingResponse",
-    "AsyncFilesResourceWithStreamingResponse",
-    "VectorStoresResource",
-    "AsyncVectorStoresResource",
-    "VectorStoresResourceWithRawResponse",
-    "AsyncVectorStoresResourceWithRawResponse",
-    "VectorStoresResourceWithStreamingResponse",
-    "AsyncVectorStoresResourceWithStreamingResponse",
-]
diff --git a/src/llama_stack_client/resources/vector_stores/files.py b/src/llama_stack_client/resources/vector_stores/files.py
deleted file mode 100644
index 1ef48084..00000000
--- a/src/llama_stack_client/resources/vector_stores/files.py
+++ /dev/null
@@ -1,201 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union, Iterable
-
-import httpx
-
-from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ..._utils import maybe_transform, async_maybe_transform
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from ..._base_client import make_request_options
-from ...types.vector_stores import file_create_params
-from ...types.vector_stores.vector_store_file import VectorStoreFile
-
-__all__ = ["FilesResource", "AsyncFilesResource"]
-
-
-class FilesResource(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> FilesResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return FilesResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> FilesResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return FilesResourceWithStreamingResponse(self)
-
-    def create(
-        self,
-        vector_store_id: str,
-        *,
-        file_id: str,
-        attributes: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        chunking_strategy: file_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> VectorStoreFile:
-        """
-        Attach a file to a vector store.
-
-        Args:
-          file_id: The ID of the file to attach to the vector store.
-
-          attributes: The key-value attributes stored with the file, which can be used for filtering.
-
-          chunking_strategy: The chunking strategy to use for the file.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not vector_store_id:
-            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
-        return self._post(
-            f"/v1/openai/v1/vector_stores/{vector_store_id}/files",
-            body=maybe_transform(
-                {
-                    "file_id": file_id,
-                    "attributes": attributes,
-                    "chunking_strategy": chunking_strategy,
-                },
-                file_create_params.FileCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=VectorStoreFile,
-        )
-
-
-class AsyncFilesResource(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncFilesResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncFilesResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncFilesResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncFilesResourceWithStreamingResponse(self)
-
-    async def create(
-        self,
-        vector_store_id: str,
-        *,
-        file_id: str,
-        attributes: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        chunking_strategy: file_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> VectorStoreFile:
-        """
-        Attach a file to a vector store.
-
-        Args:
-          file_id: The ID of the file to attach to the vector store.
-
-          attributes: The key-value attributes stored with the file, which can be used for filtering.
-
-          chunking_strategy: The chunking strategy to use for the file.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not vector_store_id:
-            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
-        return await self._post(
-            f"/v1/openai/v1/vector_stores/{vector_store_id}/files",
-            body=await async_maybe_transform(
-                {
-                    "file_id": file_id,
-                    "attributes": attributes,
-                    "chunking_strategy": chunking_strategy,
-                },
-                file_create_params.FileCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=VectorStoreFile,
-        )
-
-
-class FilesResourceWithRawResponse:
-    def __init__(self, files: FilesResource) -> None:
-        self._files = files
-
-        self.create = to_raw_response_wrapper(
-            files.create,
-        )
-
-
-class AsyncFilesResourceWithRawResponse:
-    def __init__(self, files: AsyncFilesResource) -> None:
-        self._files = files
-
-        self.create = async_to_raw_response_wrapper(
-            files.create,
-        )
-
-
-class FilesResourceWithStreamingResponse:
-    def __init__(self, files: FilesResource) -> None:
-        self._files = files
-
-        self.create = to_streamed_response_wrapper(
-            files.create,
-        )
-
-
-class AsyncFilesResourceWithStreamingResponse:
-    def __init__(self, files: AsyncFilesResource) -> None:
-        self._files = files
-
-        self.create = async_to_streamed_response_wrapper(
-            files.create,
-        )
diff --git a/src/llama_stack_client/resources/vector_stores/vector_stores.py b/src/llama_stack_client/resources/vector_stores/vector_stores.py
deleted file mode 100644
index 7985cee9..00000000
--- a/src/llama_stack_client/resources/vector_stores/vector_stores.py
+++ /dev/null
@@ -1,825 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, List, Union, Iterable
-
-import httpx
-
-from .files import (
-    FilesResource,
-    AsyncFilesResource,
-    FilesResourceWithRawResponse,
-    AsyncFilesResourceWithRawResponse,
-    FilesResourceWithStreamingResponse,
-    AsyncFilesResourceWithStreamingResponse,
-)
-from ...types import (
-    vector_store_list_params,
-    vector_store_create_params,
-    vector_store_search_params,
-    vector_store_update_params,
-)
-from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ..._utils import maybe_transform, async_maybe_transform
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import (
-    to_raw_response_wrapper,
-    to_streamed_response_wrapper,
-    async_to_raw_response_wrapper,
-    async_to_streamed_response_wrapper,
-)
-from ..._base_client import make_request_options
-from ...types.vector_store import VectorStore
-from ...types.list_vector_stores_response import ListVectorStoresResponse
-from ...types.vector_store_delete_response import VectorStoreDeleteResponse
-from ...types.vector_store_search_response import VectorStoreSearchResponse
-
-__all__ = ["VectorStoresResource", "AsyncVectorStoresResource"]
-
-
-class VectorStoresResource(SyncAPIResource):
-    @cached_property
-    def files(self) -> FilesResource:
-        return FilesResource(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> VectorStoresResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return VectorStoresResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> VectorStoresResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return VectorStoresResourceWithStreamingResponse(self)
-
-    def create(
-        self,
-        *,
-        name: str,
-        chunking_strategy: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        embedding_dimension: int | NotGiven = NOT_GIVEN,
-        embedding_model: str | NotGiven = NOT_GIVEN,
-        expires_after: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        file_ids: List[str] | NotGiven = NOT_GIVEN,
-        metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        provider_id: str | NotGiven = NOT_GIVEN,
-        provider_vector_db_id: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> VectorStore:
-        """
-        Creates a vector store.
-
-        Args:
-          name: A name for the vector store.
-
-          chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
-              strategy.
-
-          embedding_dimension: The dimension of the embedding vectors (default: 384).
-
-          embedding_model: The embedding model to use for this vector store.
-
-          expires_after: The expiration policy for a vector store.
-
-          file_ids: A list of File IDs that the vector store should use. Useful for tools like
-              `file_search` that can access files.
-
-          metadata: Set of 16 key-value pairs that can be attached to an object.
-
-          provider_id: The ID of the provider to use for this vector store.
-
-          provider_vector_db_id: The provider-specific vector database ID.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._post(
-            "/v1/openai/v1/vector_stores",
-            body=maybe_transform(
-                {
-                    "name": name,
-                    "chunking_strategy": chunking_strategy,
-                    "embedding_dimension": embedding_dimension,
-                    "embedding_model": embedding_model,
-                    "expires_after": expires_after,
-                    "file_ids": file_ids,
-                    "metadata": metadata,
-                    "provider_id": provider_id,
-                    "provider_vector_db_id": provider_vector_db_id,
-                },
-                vector_store_create_params.VectorStoreCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=VectorStore,
-        )
-
-    def retrieve(
-        self,
-        vector_store_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> VectorStore:
-        """
-        Retrieves a vector store.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not vector_store_id:
-            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
-        return self._get(
-            f"/v1/openai/v1/vector_stores/{vector_store_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=VectorStore,
-        )
-
-    def update(
-        self,
-        vector_store_id: str,
-        *,
-        expires_after: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        name: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> VectorStore:
-        """
-        Updates a vector store.
-
-        Args:
-          expires_after: The expiration policy for a vector store.
-
-          metadata: Set of 16 key-value pairs that can be attached to an object.
-
-          name: The name of the vector store.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not vector_store_id:
-            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
-        return self._post(
-            f"/v1/openai/v1/vector_stores/{vector_store_id}",
-            body=maybe_transform(
-                {
-                    "expires_after": expires_after,
-                    "metadata": metadata,
-                    "name": name,
-                },
-                vector_store_update_params.VectorStoreUpdateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=VectorStore,
-        )
-
-    def list(
-        self,
-        *,
-        after: str | NotGiven = NOT_GIVEN,
-        before: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        order: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ListVectorStoresResponse:
-        """Returns a list of vector stores.
-
-        Args:
-          after: A cursor for use in pagination.
-
-        `after` is an object ID that defines your place
-              in the list.
-
-          before: A cursor for use in pagination. `before` is an object ID that defines your place
-              in the list.
-
-          limit: A limit on the number of objects to be returned. Limit can range between 1 and
-              100, and the default is 20.
-
-          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
-              order and `desc` for descending order.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._get(
-            "/v1/openai/v1/vector_stores",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform(
-                    {
-                        "after": after,
-                        "before": before,
-                        "limit": limit,
-                        "order": order,
-                    },
-                    vector_store_list_params.VectorStoreListParams,
-                ),
-            ),
-            cast_to=ListVectorStoresResponse,
-        )
-
-    def delete(
-        self,
-        vector_store_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> VectorStoreDeleteResponse:
-        """
-        Delete a vector store.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not vector_store_id:
-            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
-        return self._delete(
-            f"/v1/openai/v1/vector_stores/{vector_store_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=VectorStoreDeleteResponse,
-        )
-
-    def search(
-        self,
-        vector_store_id: str,
-        *,
-        query: Union[str, List[str]],
-        filters: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        max_num_results: int | NotGiven = NOT_GIVEN,
-        ranking_options: vector_store_search_params.RankingOptions | NotGiven = NOT_GIVEN,
-        rewrite_query: bool | NotGiven = NOT_GIVEN,
-        search_mode: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> VectorStoreSearchResponse:
-        """Search for chunks in a vector store.
-
-        Searches a vector store for relevant chunks
-        based on a query and optional file attribute filters.
-
-        Args:
-          query: The query string or array for performing the search.
-
-          filters: Filters based on file attributes to narrow the search results.
-
-          max_num_results: Maximum number of results to return (1 to 50 inclusive, default 10).
-
-          ranking_options: Ranking options for fine-tuning the search results.
-
-          rewrite_query: Whether to rewrite the natural language query for vector search (default false)
-
-          search_mode: The search mode to use - "keyword", "vector", or "hybrid" (default "vector")
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not vector_store_id:
-            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
-        return self._post(
-            f"/v1/openai/v1/vector_stores/{vector_store_id}/search",
-            body=maybe_transform(
-                {
-                    "query": query,
-                    "filters": filters,
-                    "max_num_results": max_num_results,
-                    "ranking_options": ranking_options,
-                    "rewrite_query": rewrite_query,
-                    "search_mode": search_mode,
-                },
-                vector_store_search_params.VectorStoreSearchParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=VectorStoreSearchResponse,
-        )
-
-
-class AsyncVectorStoresResource(AsyncAPIResource):
-    @cached_property
-    def files(self) -> AsyncFilesResource:
-        return AsyncFilesResource(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> AsyncVectorStoresResourceWithRawResponse:
-        """
-        This property can be used as a prefix for any HTTP method call to return
-        the raw response object instead of the parsed content.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#accessing-raw-response-data-eg-headers
-        """
-        return AsyncVectorStoresResourceWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncVectorStoresResourceWithStreamingResponse:
-        """
-        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-
-        For more information, see https://www.github.com/llamastack/llama-stack-client-python#with_streaming_response
-        """
-        return AsyncVectorStoresResourceWithStreamingResponse(self)
-
-    async def create(
-        self,
-        *,
-        name: str,
-        chunking_strategy: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        embedding_dimension: int | NotGiven = NOT_GIVEN,
-        embedding_model: str | NotGiven = NOT_GIVEN,
-        expires_after: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        file_ids: List[str] | NotGiven = NOT_GIVEN,
-        metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        provider_id: str | NotGiven = NOT_GIVEN,
-        provider_vector_db_id: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> VectorStore:
-        """
-        Creates a vector store.
-
-        Args:
-          name: A name for the vector store.
-
-          chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
-              strategy.
-
-          embedding_dimension: The dimension of the embedding vectors (default: 384).
-
-          embedding_model: The embedding model to use for this vector store.
-
-          expires_after: The expiration policy for a vector store.
-
-          file_ids: A list of File IDs that the vector store should use. Useful for tools like
-              `file_search` that can access files.
-
-          metadata: Set of 16 key-value pairs that can be attached to an object.
-
-          provider_id: The ID of the provider to use for this vector store.
-
-          provider_vector_db_id: The provider-specific vector database ID.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return await self._post(
-            "/v1/openai/v1/vector_stores",
-            body=await async_maybe_transform(
-                {
-                    "name": name,
-                    "chunking_strategy": chunking_strategy,
-                    "embedding_dimension": embedding_dimension,
-                    "embedding_model": embedding_model,
-                    "expires_after": expires_after,
-                    "file_ids": file_ids,
-                    "metadata": metadata,
-                    "provider_id": provider_id,
-                    "provider_vector_db_id": provider_vector_db_id,
-                },
-                vector_store_create_params.VectorStoreCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=VectorStore,
-        )
-
-    async def retrieve(
-        self,
-        vector_store_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> VectorStore:
-        """
-        Retrieves a vector store.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not vector_store_id:
-            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
-        return await self._get(
-            f"/v1/openai/v1/vector_stores/{vector_store_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=VectorStore,
-        )
-
-    async def update(
-        self,
-        vector_store_id: str,
-        *,
-        expires_after: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        name: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> VectorStore:
-        """
-        Updates a vector store.
-
-        Args:
-          expires_after: The expiration policy for a vector store.
-
-          metadata: Set of 16 key-value pairs that can be attached to an object.
-
-          name: The name of the vector store.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not vector_store_id:
-            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
-        return await self._post(
-            f"/v1/openai/v1/vector_stores/{vector_store_id}",
-            body=await async_maybe_transform(
-                {
-                    "expires_after": expires_after,
-                    "metadata": metadata,
-                    "name": name,
-                },
-                vector_store_update_params.VectorStoreUpdateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=VectorStore,
-        )
-
-    async def list(
-        self,
-        *,
-        after: str | NotGiven = NOT_GIVEN,
-        before: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        order: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ListVectorStoresResponse:
-        """Returns a list of vector stores.
-
-        Args:
-          after: A cursor for use in pagination.
-
-        `after` is an object ID that defines your place
-              in the list.
-
-          before: A cursor for use in pagination. `before` is an object ID that defines your place
-              in the list.
-
-          limit: A limit on the number of objects to be returned. Limit can range between 1 and
-              100, and the default is 20.
-
-          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
-              order and `desc` for descending order.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return await self._get(
-            "/v1/openai/v1/vector_stores",
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=await async_maybe_transform(
-                    {
-                        "after": after,
-                        "before": before,
-                        "limit": limit,
-                        "order": order,
-                    },
-                    vector_store_list_params.VectorStoreListParams,
-                ),
-            ),
-            cast_to=ListVectorStoresResponse,
-        )
-
-    async def delete(
-        self,
-        vector_store_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> VectorStoreDeleteResponse:
-        """
-        Delete a vector store.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not vector_store_id:
-            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
-        return await self._delete(
-            f"/v1/openai/v1/vector_stores/{vector_store_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=VectorStoreDeleteResponse,
-        )
-
-    async def search(
-        self,
-        vector_store_id: str,
-        *,
-        query: Union[str, List[str]],
-        filters: Dict[str, Union[bool, float, str, Iterable[object], object, None]] | NotGiven = NOT_GIVEN,
-        max_num_results: int | NotGiven = NOT_GIVEN,
-        ranking_options: vector_store_search_params.RankingOptions | NotGiven = NOT_GIVEN,
-        rewrite_query: bool | NotGiven = NOT_GIVEN,
-        search_mode: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> VectorStoreSearchResponse:
-        """Search for chunks in a vector store.
-
-        Searches a vector store for relevant chunks
-        based on a query and optional file attribute filters.
-
-        Args:
-          query: The query string or array for performing the search.
-
-          filters: Filters based on file attributes to narrow the search results.
-
-          max_num_results: Maximum number of results to return (1 to 50 inclusive, default 10).
-
-          ranking_options: Ranking options for fine-tuning the search results.
-
-          rewrite_query: Whether to rewrite the natural language query for vector search (default false)
-
-          search_mode: The search mode to use - "keyword", "vector", or "hybrid" (default "vector")
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not vector_store_id:
-            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
-        return await self._post(
-            f"/v1/openai/v1/vector_stores/{vector_store_id}/search",
-            body=await async_maybe_transform(
-                {
-                    "query": query,
-                    "filters": filters,
-                    "max_num_results": max_num_results,
-                    "ranking_options": ranking_options,
-                    "rewrite_query": rewrite_query,
-                    "search_mode": search_mode,
-                },
-                vector_store_search_params.VectorStoreSearchParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=VectorStoreSearchResponse,
-        )
-
-
-class VectorStoresResourceWithRawResponse:
-    def __init__(self, vector_stores: VectorStoresResource) -> None:
-        self._vector_stores = vector_stores
-
-        self.create = to_raw_response_wrapper(
-            vector_stores.create,
-        )
-        self.retrieve = to_raw_response_wrapper(
-            vector_stores.retrieve,
-        )
-        self.update = to_raw_response_wrapper(
-            vector_stores.update,
-        )
-        self.list = to_raw_response_wrapper(
-            vector_stores.list,
-        )
-        self.delete = to_raw_response_wrapper(
-            vector_stores.delete,
-        )
-        self.search = to_raw_response_wrapper(
-            vector_stores.search,
-        )
-
-    @cached_property
-    def files(self) -> FilesResourceWithRawResponse:
-        return FilesResourceWithRawResponse(self._vector_stores.files)
-
-
-class AsyncVectorStoresResourceWithRawResponse:
-    def __init__(self, vector_stores: AsyncVectorStoresResource) -> None:
-        self._vector_stores = vector_stores
-
-        self.create = async_to_raw_response_wrapper(
-            vector_stores.create,
-        )
-        self.retrieve = async_to_raw_response_wrapper(
-            vector_stores.retrieve,
-        )
-        self.update = async_to_raw_response_wrapper(
-            vector_stores.update,
-        )
-        self.list = async_to_raw_response_wrapper(
-            vector_stores.list,
-        )
-        self.delete = async_to_raw_response_wrapper(
-            vector_stores.delete,
-        )
-        self.search = async_to_raw_response_wrapper(
-            vector_stores.search,
-        )
-
-    @cached_property
-    def files(self) -> AsyncFilesResourceWithRawResponse:
-        return AsyncFilesResourceWithRawResponse(self._vector_stores.files)
-
-
-class VectorStoresResourceWithStreamingResponse:
-    def __init__(self, vector_stores: VectorStoresResource) -> None:
-        self._vector_stores = vector_stores
-
-        self.create = to_streamed_response_wrapper(
-            vector_stores.create,
-        )
-        self.retrieve = to_streamed_response_wrapper(
-            vector_stores.retrieve,
-        )
-        self.update = to_streamed_response_wrapper(
-            vector_stores.update,
-        )
-        self.list = to_streamed_response_wrapper(
-            vector_stores.list,
-        )
-        self.delete = to_streamed_response_wrapper(
-            vector_stores.delete,
-        )
-        self.search = to_streamed_response_wrapper(
-            vector_stores.search,
-        )
-
-    @cached_property
-    def files(self) -> FilesResourceWithStreamingResponse:
-        return FilesResourceWithStreamingResponse(self._vector_stores.files)
-
-
-class AsyncVectorStoresResourceWithStreamingResponse:
-    def __init__(self, vector_stores: AsyncVectorStoresResource) -> None:
-        self._vector_stores = vector_stores
-
-        self.create = async_to_streamed_response_wrapper(
-            vector_stores.create,
-        )
-        self.retrieve = async_to_streamed_response_wrapper(
-            vector_stores.retrieve,
-        )
-        self.update = async_to_streamed_response_wrapper(
-            vector_stores.update,
-        )
-        self.list = async_to_streamed_response_wrapper(
-            vector_stores.list,
-        )
-        self.delete = async_to_streamed_response_wrapper(
-            vector_stores.delete,
-        )
-        self.search = async_to_streamed_response_wrapper(
-            vector_stores.search,
-        )
-
-    @cached_property
-    def files(self) -> AsyncFilesResourceWithStreamingResponse:
-        return AsyncFilesResourceWithStreamingResponse(self._vector_stores.files)
diff --git a/src/llama_stack_client/types/__init__.py b/src/llama_stack_client/types/__init__.py
deleted file mode 100644
index 7f742ba5..00000000
--- a/src/llama_stack_client/types/__init__.py
+++ /dev/null
@@ -1,175 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from .job import Job as Job
-from .file import File as File
-from .tool import Tool as Tool
-from .model import Model as Model
-from .trace import Trace as Trace
-from .shared import (
-    Message as Message,
-    Document as Document,
-    ToolCall as ToolCall,
-    ParamType as ParamType,
-    ReturnType as ReturnType,
-    AgentConfig as AgentConfig,
-    QueryConfig as QueryConfig,
-    QueryResult as QueryResult,
-    UserMessage as UserMessage,
-    ContentDelta as ContentDelta,
-    ScoringResult as ScoringResult,
-    SystemMessage as SystemMessage,
-    ResponseFormat as ResponseFormat,
-    SamplingParams as SamplingParams,
-    BatchCompletion as BatchCompletion,
-    SafetyViolation as SafetyViolation,
-    ToolCallOrString as ToolCallOrString,
-    CompletionMessage as CompletionMessage,
-    InterleavedContent as InterleavedContent,
-    ToolParamDefinition as ToolParamDefinition,
-    ToolResponseMessage as ToolResponseMessage,
-    QueryGeneratorConfig as QueryGeneratorConfig,
-    ChatCompletionResponse as ChatCompletionResponse,
-    InterleavedContentItem as InterleavedContentItem,
-)
-from .shield import Shield as Shield
-from .tool_def import ToolDef as ToolDef
-from .benchmark import Benchmark as Benchmark
-from .route_info import RouteInfo as RouteInfo
-from .scoring_fn import ScoringFn as ScoringFn
-from .tool_group import ToolGroup as ToolGroup
-from .event_param import EventParam as EventParam
-from .health_info import HealthInfo as HealthInfo
-from .vector_store import VectorStore as VectorStore
-from .version_info import VersionInfo as VersionInfo
-from .provider_info import ProviderInfo as ProviderInfo
-from .tool_response import ToolResponse as ToolResponse
-from .inference_step import InferenceStep as InferenceStep
-from .tool_def_param import ToolDefParam as ToolDefParam
-from .response_object import ResponseObject as ResponseObject
-from .token_log_probs import TokenLogProbs as TokenLogProbs
-from .file_list_params import FileListParams as FileListParams
-from .shield_call_step import ShieldCallStep as ShieldCallStep
-from .span_with_status import SpanWithStatus as SpanWithStatus
-from .tool_list_params import ToolListParams as ToolListParams
-from .evaluate_response import EvaluateResponse as EvaluateResponse
-from .post_training_job import PostTrainingJob as PostTrainingJob
-from .scoring_fn_params import ScoringFnParams as ScoringFnParams
-from .file_create_params import FileCreateParams as FileCreateParams
-from .tool_list_response import ToolListResponse as ToolListResponse
-from .agent_create_params import AgentCreateParams as AgentCreateParams
-from .completion_response import CompletionResponse as CompletionResponse
-from .embeddings_response import EmbeddingsResponse as EmbeddingsResponse
-from .list_files_response import ListFilesResponse as ListFilesResponse
-from .list_tools_response import ListToolsResponse as ListToolsResponse
-from .model_list_response import ModelListResponse as ModelListResponse
-from .route_list_response import RouteListResponse as RouteListResponse
-from .run_shield_response import RunShieldResponse as RunShieldResponse
-from .tool_execution_step import ToolExecutionStep as ToolExecutionStep
-from .tool_response_param import ToolResponseParam as ToolResponseParam
-from .delete_file_response import DeleteFileResponse as DeleteFileResponse
-from .eval_candidate_param import EvalCandidateParam as EvalCandidateParam
-from .eval_run_eval_params import EvalRunEvalParams as EvalRunEvalParams
-from .list_models_response import ListModelsResponse as ListModelsResponse
-from .list_routes_response import ListRoutesResponse as ListRoutesResponse
-from .query_spans_response import QuerySpansResponse as QuerySpansResponse
-from .response_list_params import ResponseListParams as ResponseListParams
-from .scoring_score_params import ScoringScoreParams as ScoringScoreParams
-from .shield_list_response import ShieldListResponse as ShieldListResponse
-from .agent_create_response import AgentCreateResponse as AgentCreateResponse
-from .chat_completion_chunk import ChatCompletionChunk as ChatCompletionChunk
-from .dataset_list_response import DatasetListResponse as DatasetListResponse
-from .list_shields_response import ListShieldsResponse as ListShieldsResponse
-from .memory_retrieval_step import MemoryRetrievalStep as MemoryRetrievalStep
-from .model_register_params import ModelRegisterParams as ModelRegisterParams
-from .query_chunks_response import QueryChunksResponse as QueryChunksResponse
-from .query_condition_param import QueryConditionParam as QueryConditionParam
-from .algorithm_config_param import AlgorithmConfigParam as AlgorithmConfigParam
-from .benchmark_config_param import BenchmarkConfigParam as BenchmarkConfigParam
-from .list_datasets_response import ListDatasetsResponse as ListDatasetsResponse
-from .provider_list_response import ProviderListResponse as ProviderListResponse
-from .response_create_params import ResponseCreateParams as ResponseCreateParams
-from .response_list_response import ResponseListResponse as ResponseListResponse
-from .response_object_stream import ResponseObjectStream as ResponseObjectStream
-from .scoring_score_response import ScoringScoreResponse as ScoringScoreResponse
-from .shield_register_params import ShieldRegisterParams as ShieldRegisterParams
-from .tool_invocation_result import ToolInvocationResult as ToolInvocationResult
-from .vector_io_query_params import VectorIoQueryParams as VectorIoQueryParams
-from .benchmark_list_response import BenchmarkListResponse as BenchmarkListResponse
-from .dataset_iterrows_params import DatasetIterrowsParams as DatasetIterrowsParams
-from .dataset_register_params import DatasetRegisterParams as DatasetRegisterParams
-from .embedding_create_params import EmbeddingCreateParams as EmbeddingCreateParams
-from .list_providers_response import ListProvidersResponse as ListProvidersResponse
-from .scoring_fn_params_param import ScoringFnParamsParam as ScoringFnParamsParam
-from .toolgroup_list_response import ToolgroupListResponse as ToolgroupListResponse
-from .vector_db_list_response import VectorDBListResponse as VectorDBListResponse
-from .vector_io_insert_params import VectorIoInsertParams as VectorIoInsertParams
-from .completion_create_params import CompletionCreateParams as CompletionCreateParams
-from .list_benchmarks_response import ListBenchmarksResponse as ListBenchmarksResponse
-from .list_vector_dbs_response import ListVectorDBsResponse as ListVectorDBsResponse
-from .safety_run_shield_params import SafetyRunShieldParams as SafetyRunShieldParams
-from .vector_store_list_params import VectorStoreListParams as VectorStoreListParams
-from .benchmark_register_params import BenchmarkRegisterParams as BenchmarkRegisterParams
-from .dataset_iterrows_response import DatasetIterrowsResponse as DatasetIterrowsResponse
-from .dataset_register_response import DatasetRegisterResponse as DatasetRegisterResponse
-from .dataset_retrieve_response import DatasetRetrieveResponse as DatasetRetrieveResponse
-from .eval_evaluate_rows_params import EvalEvaluateRowsParams as EvalEvaluateRowsParams
-from .list_tool_groups_response import ListToolGroupsResponse as ListToolGroupsResponse
-from .toolgroup_register_params import ToolgroupRegisterParams as ToolgroupRegisterParams
-from .vector_db_register_params import VectorDBRegisterParams as VectorDBRegisterParams
-from .completion_create_response import CompletionCreateResponse as CompletionCreateResponse
-from .create_embeddings_response import CreateEmbeddingsResponse as CreateEmbeddingsResponse
-from .eval_run_eval_alpha_params import EvalRunEvalAlphaParams as EvalRunEvalAlphaParams
-from .scoring_score_batch_params import ScoringScoreBatchParams as ScoringScoreBatchParams
-from .telemetry_log_event_params import TelemetryLogEventParams as TelemetryLogEventParams
-from .vector_store_create_params import VectorStoreCreateParams as VectorStoreCreateParams
-from .vector_store_search_params import VectorStoreSearchParams as VectorStoreSearchParams
-from .vector_store_update_params import VectorStoreUpdateParams as VectorStoreUpdateParams
-from .inference_completion_params import InferenceCompletionParams as InferenceCompletionParams
-from .inference_embeddings_params import InferenceEmbeddingsParams as InferenceEmbeddingsParams
-from .list_vector_stores_response import ListVectorStoresResponse as ListVectorStoresResponse
-from .telemetry_get_span_response import TelemetryGetSpanResponse as TelemetryGetSpanResponse
-from .vector_db_register_response import VectorDBRegisterResponse as VectorDBRegisterResponse
-from .vector_db_retrieve_response import VectorDBRetrieveResponse as VectorDBRetrieveResponse
-from .scoring_score_batch_response import ScoringScoreBatchResponse as ScoringScoreBatchResponse
-from .telemetry_query_spans_params import TelemetryQuerySpansParams as TelemetryQuerySpansParams
-from .vector_store_delete_response import VectorStoreDeleteResponse as VectorStoreDeleteResponse
-from .vector_store_search_response import VectorStoreSearchResponse as VectorStoreSearchResponse
-from .telemetry_query_traces_params import TelemetryQueryTracesParams as TelemetryQueryTracesParams
-from .scoring_function_list_response import ScoringFunctionListResponse as ScoringFunctionListResponse
-from .telemetry_get_span_tree_params import TelemetryGetSpanTreeParams as TelemetryGetSpanTreeParams
-from .telemetry_query_spans_response import TelemetryQuerySpansResponse as TelemetryQuerySpansResponse
-from .tool_runtime_list_tools_params import ToolRuntimeListToolsParams as ToolRuntimeListToolsParams
-from .eval_evaluate_rows_alpha_params import EvalEvaluateRowsAlphaParams as EvalEvaluateRowsAlphaParams
-from .list_scoring_functions_response import ListScoringFunctionsResponse as ListScoringFunctionsResponse
-from .telemetry_query_traces_response import TelemetryQueryTracesResponse as TelemetryQueryTracesResponse
-from .tool_runtime_invoke_tool_params import ToolRuntimeInvokeToolParams as ToolRuntimeInvokeToolParams
-from .inference_chat_completion_params import InferenceChatCompletionParams as InferenceChatCompletionParams
-from .list_post_training_jobs_response import ListPostTrainingJobsResponse as ListPostTrainingJobsResponse
-from .scoring_function_register_params import ScoringFunctionRegisterParams as ScoringFunctionRegisterParams
-from .telemetry_get_span_tree_response import TelemetryGetSpanTreeResponse as TelemetryGetSpanTreeResponse
-from .tool_runtime_list_tools_response import ToolRuntimeListToolsResponse as ToolRuntimeListToolsResponse
-from .inference_batch_completion_params import InferenceBatchCompletionParams as InferenceBatchCompletionParams
-from .synthetic_data_generation_response import SyntheticDataGenerationResponse as SyntheticDataGenerationResponse
-from .chat_completion_response_stream_chunk import (
-    ChatCompletionResponseStreamChunk as ChatCompletionResponseStreamChunk,
-)
-from .inference_batch_chat_completion_params import (
-    InferenceBatchChatCompletionParams as InferenceBatchChatCompletionParams,
-)
-from .telemetry_save_spans_to_dataset_params import (
-    TelemetrySaveSpansToDatasetParams as TelemetrySaveSpansToDatasetParams,
-)
-from .inference_batch_chat_completion_response import (
-    InferenceBatchChatCompletionResponse as InferenceBatchChatCompletionResponse,
-)
-from .post_training_preference_optimize_params import (
-    PostTrainingPreferenceOptimizeParams as PostTrainingPreferenceOptimizeParams,
-)
-from .post_training_supervised_fine_tune_params import (
-    PostTrainingSupervisedFineTuneParams as PostTrainingSupervisedFineTuneParams,
-)
-from .synthetic_data_generation_generate_params import (
-    SyntheticDataGenerationGenerateParams as SyntheticDataGenerationGenerateParams,
-)
diff --git a/src/llama_stack_client/types/agent_create_params.py b/src/llama_stack_client/types/agent_create_params.py
deleted file mode 100644
index 525cf1e2..00000000
--- a/src/llama_stack_client/types/agent_create_params.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Required, TypedDict
-
-from .shared_params.agent_config import AgentConfig
-
-__all__ = ["AgentCreateParams"]
-
-
-class AgentCreateParams(TypedDict, total=False):
-    agent_config: Required[AgentConfig]
-    """The configuration for the agent."""
diff --git a/src/llama_stack_client/types/agent_create_response.py b/src/llama_stack_client/types/agent_create_response.py
deleted file mode 100644
index 93651cb6..00000000
--- a/src/llama_stack_client/types/agent_create_response.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .._models import BaseModel
-
-__all__ = ["AgentCreateResponse"]
-
-
-class AgentCreateResponse(BaseModel):
-    agent_id: str
diff --git a/src/llama_stack_client/types/agents/__init__.py b/src/llama_stack_client/types/agents/__init__.py
deleted file mode 100644
index 30355cbf..00000000
--- a/src/llama_stack_client/types/agents/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from .turn import Turn as Turn
-from .session import Session as Session
-from .turn_create_params import TurnCreateParams as TurnCreateParams
-from .turn_resume_params import TurnResumeParams as TurnResumeParams
-from .turn_response_event import TurnResponseEvent as TurnResponseEvent
-from .session_create_params import SessionCreateParams as SessionCreateParams
-from .step_retrieve_response import StepRetrieveResponse as StepRetrieveResponse
-from .session_create_response import SessionCreateResponse as SessionCreateResponse
-from .session_retrieve_params import SessionRetrieveParams as SessionRetrieveParams
-from .turn_response_event_payload import TurnResponseEventPayload as TurnResponseEventPayload
-from .agent_turn_response_stream_chunk import AgentTurnResponseStreamChunk as AgentTurnResponseStreamChunk
diff --git a/src/llama_stack_client/types/agents/agent_turn_response_stream_chunk.py b/src/llama_stack_client/types/agents/agent_turn_response_stream_chunk.py
deleted file mode 100644
index c488ba81..00000000
--- a/src/llama_stack_client/types/agents/agent_turn_response_stream_chunk.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from ..._models import BaseModel
-from .turn_response_event import TurnResponseEvent
-
-__all__ = ["AgentTurnResponseStreamChunk"]
-
-
-class AgentTurnResponseStreamChunk(BaseModel):
-    event: TurnResponseEvent
diff --git a/src/llama_stack_client/types/agents/session.py b/src/llama_stack_client/types/agents/session.py
deleted file mode 100644
index 707c4cbf..00000000
--- a/src/llama_stack_client/types/agents/session.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List
-from datetime import datetime
-
-from .turn import Turn
-from ..._models import BaseModel
-
-__all__ = ["Session"]
-
-
-class Session(BaseModel):
-    session_id: str
-
-    session_name: str
-
-    started_at: datetime
-
-    turns: List[Turn]
diff --git a/src/llama_stack_client/types/agents/session_create_params.py b/src/llama_stack_client/types/agents/session_create_params.py
deleted file mode 100644
index 5f421ae9..00000000
--- a/src/llama_stack_client/types/agents/session_create_params.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Required, TypedDict
-
-__all__ = ["SessionCreateParams"]
-
-
-class SessionCreateParams(TypedDict, total=False):
-    session_name: Required[str]
-    """The name of the session to create."""
diff --git a/src/llama_stack_client/types/agents/session_create_response.py b/src/llama_stack_client/types/agents/session_create_response.py
deleted file mode 100644
index abf18665..00000000
--- a/src/llama_stack_client/types/agents/session_create_response.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from ..._models import BaseModel
-
-__all__ = ["SessionCreateResponse"]
-
-
-class SessionCreateResponse(BaseModel):
-    session_id: str
diff --git a/src/llama_stack_client/types/agents/session_retrieve_params.py b/src/llama_stack_client/types/agents/session_retrieve_params.py
deleted file mode 100644
index 30337586..00000000
--- a/src/llama_stack_client/types/agents/session_retrieve_params.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import List
-from typing_extensions import Required, TypedDict
-
-__all__ = ["SessionRetrieveParams"]
-
-
-class SessionRetrieveParams(TypedDict, total=False):
-    agent_id: Required[str]
-
-    turn_ids: List[str]
-    """(Optional) List of turn IDs to filter the session by."""
diff --git a/src/llama_stack_client/types/agents/step_retrieve_response.py b/src/llama_stack_client/types/agents/step_retrieve_response.py
deleted file mode 100644
index fcf2044b..00000000
--- a/src/llama_stack_client/types/agents/step_retrieve_response.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Union
-from typing_extensions import Annotated, TypeAlias
-
-from ..._utils import PropertyInfo
-from ..._models import BaseModel
-from ..inference_step import InferenceStep
-from ..shield_call_step import ShieldCallStep
-from ..tool_execution_step import ToolExecutionStep
-from ..memory_retrieval_step import MemoryRetrievalStep
-
-__all__ = ["StepRetrieveResponse", "Step"]
-
-Step: TypeAlias = Annotated[
-    Union[InferenceStep, ToolExecutionStep, ShieldCallStep, MemoryRetrievalStep],
-    PropertyInfo(discriminator="step_type"),
-]
-
-
-class StepRetrieveResponse(BaseModel):
-    step: Step
-    """An inference step in an agent turn."""
diff --git a/src/llama_stack_client/types/agents/turn.py b/src/llama_stack_client/types/agents/turn.py
deleted file mode 100644
index aa8eeefe..00000000
--- a/src/llama_stack_client/types/agents/turn.py
+++ /dev/null
@@ -1,107 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Union, Optional
-from datetime import datetime
-from typing_extensions import Literal, Annotated, TypeAlias
-
-from ..._utils import PropertyInfo
-from ..._models import BaseModel
-from ..inference_step import InferenceStep
-from ..shield_call_step import ShieldCallStep
-from ..shared.user_message import UserMessage
-from ..tool_execution_step import ToolExecutionStep
-from ..memory_retrieval_step import MemoryRetrievalStep
-from ..shared.completion_message import CompletionMessage
-from ..shared.tool_response_message import ToolResponseMessage
-from ..shared.interleaved_content_item import InterleavedContentItem
-
-__all__ = [
-    "Turn",
-    "InputMessage",
-    "Step",
-    "OutputAttachment",
-    "OutputAttachmentContent",
-    "OutputAttachmentContentImageContentItem",
-    "OutputAttachmentContentImageContentItemImage",
-    "OutputAttachmentContentImageContentItemImageURL",
-    "OutputAttachmentContentTextContentItem",
-    "OutputAttachmentContentURL",
-]
-
-InputMessage: TypeAlias = Union[UserMessage, ToolResponseMessage]
-
-Step: TypeAlias = Annotated[
-    Union[InferenceStep, ToolExecutionStep, ShieldCallStep, MemoryRetrievalStep],
-    PropertyInfo(discriminator="step_type"),
-]
-
-
-class OutputAttachmentContentImageContentItemImageURL(BaseModel):
-    uri: str
-
-
-class OutputAttachmentContentImageContentItemImage(BaseModel):
-    data: Optional[str] = None
-    """base64 encoded image data as string"""
-
-    url: Optional[OutputAttachmentContentImageContentItemImageURL] = None
-    """A URL of the image or data URL in the format of data:image/{type};base64,{data}.
-
-    Note that URL could have length limits.
-    """
-
-
-class OutputAttachmentContentImageContentItem(BaseModel):
-    image: OutputAttachmentContentImageContentItemImage
-    """Image as a base64 encoded string or an URL"""
-
-    type: Literal["image"]
-    """Discriminator type of the content item. Always "image" """
-
-
-class OutputAttachmentContentTextContentItem(BaseModel):
-    text: str
-    """Text content"""
-
-    type: Literal["text"]
-    """Discriminator type of the content item. Always "text" """
-
-
-class OutputAttachmentContentURL(BaseModel):
-    uri: str
-
-
-OutputAttachmentContent: TypeAlias = Union[
-    str,
-    OutputAttachmentContentImageContentItem,
-    OutputAttachmentContentTextContentItem,
-    List[InterleavedContentItem],
-    OutputAttachmentContentURL,
-]
-
-
-class OutputAttachment(BaseModel):
-    content: OutputAttachmentContent
-    """The content of the attachment."""
-
-    mime_type: str
-    """The MIME type of the attachment."""
-
-
-class Turn(BaseModel):
-    input_messages: List[InputMessage]
-
-    output_message: CompletionMessage
-    """A message containing the model's (assistant) response in a chat conversation."""
-
-    session_id: str
-
-    started_at: datetime
-
-    steps: List[Step]
-
-    turn_id: str
-
-    completed_at: Optional[datetime] = None
-
-    output_attachments: Optional[List[OutputAttachment]] = None
diff --git a/src/llama_stack_client/types/agents/turn_create_params.py b/src/llama_stack_client/types/agents/turn_create_params.py
deleted file mode 100644
index 01e0f64b..00000000
--- a/src/llama_stack_client/types/agents/turn_create_params.py
+++ /dev/null
@@ -1,161 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, List, Union, Iterable
-from typing_extensions import Literal, Required, TypeAlias, TypedDict
-
-from ..shared_params.user_message import UserMessage
-from ..shared_params.tool_response_message import ToolResponseMessage
-from ..shared_params.interleaved_content_item import InterleavedContentItem
-
-__all__ = [
-    "TurnCreateParamsBase",
-    "Message",
-    "Document",
-    "DocumentContent",
-    "DocumentContentImageContentItem",
-    "DocumentContentImageContentItemImage",
-    "DocumentContentImageContentItemImageURL",
-    "DocumentContentTextContentItem",
-    "DocumentContentURL",
-    "ToolConfig",
-    "Toolgroup",
-    "ToolgroupAgentToolGroupWithArgs",
-    "TurnCreateParamsNonStreaming",
-    "TurnCreateParamsStreaming",
-]
-
-
-class TurnCreateParamsBase(TypedDict, total=False):
-    agent_id: Required[str]
-
-    messages: Required[Iterable[Message]]
-    """List of messages to start the turn with."""
-
-    documents: Iterable[Document]
-    """(Optional) List of documents to create the turn with."""
-
-    tool_config: ToolConfig
-    """
-    (Optional) The tool configuration to create the turn with, will be used to
-    override the agent's tool_config.
-    """
-
-    toolgroups: List[Toolgroup]
-    """
-    (Optional) List of toolgroups to create the turn with, will be used in addition
-    to the agent's config toolgroups for the request.
-    """
-
-
-Message: TypeAlias = Union[UserMessage, ToolResponseMessage]
-
-
-class DocumentContentImageContentItemImageURL(TypedDict, total=False):
-    uri: Required[str]
-
-
-class DocumentContentImageContentItemImage(TypedDict, total=False):
-    data: str
-    """base64 encoded image data as string"""
-
-    url: DocumentContentImageContentItemImageURL
-    """A URL of the image or data URL in the format of data:image/{type};base64,{data}.
-
-    Note that URL could have length limits.
-    """
-
-
-class DocumentContentImageContentItem(TypedDict, total=False):
-    image: Required[DocumentContentImageContentItemImage]
-    """Image as a base64 encoded string or an URL"""
-
-    type: Required[Literal["image"]]
-    """Discriminator type of the content item. Always "image" """
-
-
-class DocumentContentTextContentItem(TypedDict, total=False):
-    text: Required[str]
-    """Text content"""
-
-    type: Required[Literal["text"]]
-    """Discriminator type of the content item. Always "text" """
-
-
-class DocumentContentURL(TypedDict, total=False):
-    uri: Required[str]
-
-
-DocumentContent: TypeAlias = Union[
-    str,
-    DocumentContentImageContentItem,
-    DocumentContentTextContentItem,
-    Iterable[InterleavedContentItem],
-    DocumentContentURL,
-]
-
-
-class Document(TypedDict, total=False):
-    content: Required[DocumentContent]
-    """The content of the document."""
-
-    mime_type: Required[str]
-    """The MIME type of the document."""
-
-
-class ToolConfig(TypedDict, total=False):
-    system_message_behavior: Literal["append", "replace"]
-    """(Optional) Config for how to override the default system prompt.
-
-    - `SystemMessageBehavior.append`: Appends the provided system message to the
-      default system prompt. - `SystemMessageBehavior.replace`: Replaces the default
-      system prompt with the provided system message. The system message can include
-      the string '{{function_definitions}}' to indicate where the function
-      definitions should be inserted.
-    """
-
-    tool_choice: Union[Literal["auto", "required", "none"], str]
-    """(Optional) Whether tool use is automatic, required, or none.
-
-    Can also specify a tool name to use a specific tool. Defaults to
-    ToolChoice.auto.
-    """
-
-    tool_prompt_format: Literal["json", "function_tag", "python_list"]
-    """(Optional) Instructs the model how to format tool calls.
-
-    By default, Llama Stack will attempt to use a format that is best adapted to the
-    model. - `ToolPromptFormat.json`: The tool calls are formatted as a JSON
-    object. - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a
-    <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls
-    are output as Python syntax -- a list of function calls.
-    """
-
-
-class ToolgroupAgentToolGroupWithArgs(TypedDict, total=False):
-    args: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-
-    name: Required[str]
-
-
-Toolgroup: TypeAlias = Union[str, ToolgroupAgentToolGroupWithArgs]
-
-
-class TurnCreateParamsNonStreaming(TurnCreateParamsBase, total=False):
-    stream: Literal[False]
-    """(Optional) If True, generate an SSE event stream of the response.
-
-    Defaults to False.
-    """
-
-
-class TurnCreateParamsStreaming(TurnCreateParamsBase):
-    stream: Required[Literal[True]]
-    """(Optional) If True, generate an SSE event stream of the response.
-
-    Defaults to False.
-    """
-
-
-TurnCreateParams = Union[TurnCreateParamsNonStreaming, TurnCreateParamsStreaming]
diff --git a/src/llama_stack_client/types/agents/turn_response_event.py b/src/llama_stack_client/types/agents/turn_response_event.py
deleted file mode 100644
index c6a42d75..00000000
--- a/src/llama_stack_client/types/agents/turn_response_event.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from ..._models import BaseModel
-from .turn_response_event_payload import TurnResponseEventPayload
-
-__all__ = ["TurnResponseEvent"]
-
-
-class TurnResponseEvent(BaseModel):
-    payload: TurnResponseEventPayload
diff --git a/src/llama_stack_client/types/agents/turn_response_event_payload.py b/src/llama_stack_client/types/agents/turn_response_event_payload.py
deleted file mode 100644
index 345a7ec4..00000000
--- a/src/llama_stack_client/types/agents/turn_response_event_payload.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-from typing_extensions import Literal, Annotated, TypeAlias
-
-from .turn import Turn
-from ..._utils import PropertyInfo
-from ..._models import BaseModel
-from ..inference_step import InferenceStep
-from ..shield_call_step import ShieldCallStep
-from ..tool_execution_step import ToolExecutionStep
-from ..shared.content_delta import ContentDelta
-from ..memory_retrieval_step import MemoryRetrievalStep
-
-__all__ = [
-    "TurnResponseEventPayload",
-    "AgentTurnResponseStepStartPayload",
-    "AgentTurnResponseStepProgressPayload",
-    "AgentTurnResponseStepCompletePayload",
-    "AgentTurnResponseStepCompletePayloadStepDetails",
-    "AgentTurnResponseTurnStartPayload",
-    "AgentTurnResponseTurnCompletePayload",
-    "AgentTurnResponseTurnAwaitingInputPayload",
-]
-
-
-class AgentTurnResponseStepStartPayload(BaseModel):
-    event_type: Literal["step_start"]
-
-    step_id: str
-
-    step_type: Literal["inference", "tool_execution", "shield_call", "memory_retrieval"]
-    """Type of the step in an agent turn."""
-
-    metadata: Optional[Dict[str, Union[bool, float, str, List[object], object, None]]] = None
-
-
-class AgentTurnResponseStepProgressPayload(BaseModel):
-    delta: ContentDelta
-
-    event_type: Literal["step_progress"]
-
-    step_id: str
-
-    step_type: Literal["inference", "tool_execution", "shield_call", "memory_retrieval"]
-    """Type of the step in an agent turn."""
-
-
-AgentTurnResponseStepCompletePayloadStepDetails: TypeAlias = Annotated[
-    Union[InferenceStep, ToolExecutionStep, ShieldCallStep, MemoryRetrievalStep],
-    PropertyInfo(discriminator="step_type"),
-]
-
-
-class AgentTurnResponseStepCompletePayload(BaseModel):
-    event_type: Literal["step_complete"]
-
-    step_details: AgentTurnResponseStepCompletePayloadStepDetails
-    """An inference step in an agent turn."""
-
-    step_id: str
-
-    step_type: Literal["inference", "tool_execution", "shield_call", "memory_retrieval"]
-    """Type of the step in an agent turn."""
-
-
-class AgentTurnResponseTurnStartPayload(BaseModel):
-    event_type: Literal["turn_start"]
-
-    turn_id: str
-
-
-class AgentTurnResponseTurnCompletePayload(BaseModel):
-    event_type: Literal["turn_complete"]
-
-    turn: Turn
-    """A single turn in an interaction with an Agentic System."""
-
-
-class AgentTurnResponseTurnAwaitingInputPayload(BaseModel):
-    event_type: Literal["turn_awaiting_input"]
-
-    turn: Turn
-    """A single turn in an interaction with an Agentic System."""
-
-
-TurnResponseEventPayload: TypeAlias = Annotated[
-    Union[
-        AgentTurnResponseStepStartPayload,
-        AgentTurnResponseStepProgressPayload,
-        AgentTurnResponseStepCompletePayload,
-        AgentTurnResponseTurnStartPayload,
-        AgentTurnResponseTurnCompletePayload,
-        AgentTurnResponseTurnAwaitingInputPayload,
-    ],
-    PropertyInfo(discriminator="event_type"),
-]
diff --git a/src/llama_stack_client/types/agents/turn_resume_params.py b/src/llama_stack_client/types/agents/turn_resume_params.py
deleted file mode 100644
index 554e3578..00000000
--- a/src/llama_stack_client/types/agents/turn_resume_params.py
+++ /dev/null
@@ -1,32 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Union, Iterable
-from typing_extensions import Literal, Required, TypedDict
-
-from ..tool_response_param import ToolResponseParam
-
-__all__ = ["TurnResumeParamsBase", "TurnResumeParamsNonStreaming", "TurnResumeParamsStreaming"]
-
-
-class TurnResumeParamsBase(TypedDict, total=False):
-    agent_id: Required[str]
-
-    session_id: Required[str]
-
-    tool_responses: Required[Iterable[ToolResponseParam]]
-    """The tool call responses to resume the turn with."""
-
-
-class TurnResumeParamsNonStreaming(TurnResumeParamsBase, total=False):
-    stream: Literal[False]
-    """Whether to stream the response."""
-
-
-class TurnResumeParamsStreaming(TurnResumeParamsBase):
-    stream: Required[Literal[True]]
-    """Whether to stream the response."""
-
-
-TurnResumeParams = Union[TurnResumeParamsNonStreaming, TurnResumeParamsStreaming]
diff --git a/src/llama_stack_client/types/algorithm_config_param.py b/src/llama_stack_client/types/algorithm_config_param.py
deleted file mode 100644
index 3f3c0cac..00000000
--- a/src/llama_stack_client/types/algorithm_config_param.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import List, Union
-from typing_extensions import Literal, Required, TypeAlias, TypedDict
-
-__all__ = ["AlgorithmConfigParam", "LoraFinetuningConfig", "QatFinetuningConfig"]
-
-
-class LoraFinetuningConfig(TypedDict, total=False):
-    alpha: Required[int]
-
-    apply_lora_to_mlp: Required[bool]
-
-    apply_lora_to_output: Required[bool]
-
-    lora_attn_modules: Required[List[str]]
-
-    rank: Required[int]
-
-    type: Required[Literal["LoRA"]]
-
-    quantize_base: bool
-
-    use_dora: bool
-
-
-class QatFinetuningConfig(TypedDict, total=False):
-    group_size: Required[int]
-
-    quantizer_name: Required[str]
-
-    type: Required[Literal["QAT"]]
-
-
-AlgorithmConfigParam: TypeAlias = Union[LoraFinetuningConfig, QatFinetuningConfig]
diff --git a/src/llama_stack_client/types/benchmark.py b/src/llama_stack_client/types/benchmark.py
deleted file mode 100644
index e0b1ce9e..00000000
--- a/src/llama_stack_client/types/benchmark.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-from typing_extensions import Literal
-
-from .._models import BaseModel
-
-__all__ = ["Benchmark"]
-
-
-class Benchmark(BaseModel):
-    dataset_id: str
-
-    identifier: str
-
-    metadata: Dict[str, Union[bool, float, str, List[object], object, None]]
-
-    provider_id: str
-
-    scoring_functions: List[str]
-
-    type: Literal["benchmark"]
-
-    provider_resource_id: Optional[str] = None
diff --git a/src/llama_stack_client/types/benchmark_config_param.py b/src/llama_stack_client/types/benchmark_config_param.py
deleted file mode 100644
index 740bf99b..00000000
--- a/src/llama_stack_client/types/benchmark_config_param.py
+++ /dev/null
@@ -1,28 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict
-from typing_extensions import Required, TypedDict
-
-from .eval_candidate_param import EvalCandidateParam
-from .scoring_fn_params_param import ScoringFnParamsParam
-
-__all__ = ["BenchmarkConfigParam"]
-
-
-class BenchmarkConfigParam(TypedDict, total=False):
-    eval_candidate: Required[EvalCandidateParam]
-    """The candidate to evaluate."""
-
-    scoring_params: Required[Dict[str, ScoringFnParamsParam]]
-    """
-    Map between scoring function id and parameters for each scoring function you
-    want to run
-    """
-
-    num_examples: int
-    """(Optional) The number of examples to evaluate.
-
-    If not provided, all examples in the dataset will be evaluated
-    """
diff --git a/src/llama_stack_client/types/benchmark_list_response.py b/src/llama_stack_client/types/benchmark_list_response.py
deleted file mode 100644
index b2e8ad2b..00000000
--- a/src/llama_stack_client/types/benchmark_list_response.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List
-from typing_extensions import TypeAlias
-
-from .benchmark import Benchmark
-
-__all__ = ["BenchmarkListResponse"]
-
-BenchmarkListResponse: TypeAlias = List[Benchmark]
diff --git a/src/llama_stack_client/types/benchmark_register_params.py b/src/llama_stack_client/types/benchmark_register_params.py
deleted file mode 100644
index 0fa9d508..00000000
--- a/src/llama_stack_client/types/benchmark_register_params.py
+++ /dev/null
@@ -1,28 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, List, Union, Iterable
-from typing_extensions import Required, TypedDict
-
-__all__ = ["BenchmarkRegisterParams"]
-
-
-class BenchmarkRegisterParams(TypedDict, total=False):
-    benchmark_id: Required[str]
-    """The ID of the benchmark to register."""
-
-    dataset_id: Required[str]
-    """The ID of the dataset to use for the benchmark."""
-
-    scoring_functions: Required[List[str]]
-    """The scoring functions to use for the benchmark."""
-
-    metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
-    """The metadata to use for the benchmark."""
-
-    provider_benchmark_id: str
-    """The ID of the provider benchmark to use for the benchmark."""
-
-    provider_id: str
-    """The ID of the provider to use for the benchmark."""
diff --git a/src/llama_stack_client/types/chat/__init__.py b/src/llama_stack_client/types/chat/__init__.py
deleted file mode 100644
index 27720e7f..00000000
--- a/src/llama_stack_client/types/chat/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from .completion_list_params import CompletionListParams as CompletionListParams
-from .completion_create_params import CompletionCreateParams as CompletionCreateParams
-from .completion_list_response import CompletionListResponse as CompletionListResponse
-from .completion_create_response import CompletionCreateResponse as CompletionCreateResponse
-from .completion_retrieve_response import CompletionRetrieveResponse as CompletionRetrieveResponse
diff --git a/src/llama_stack_client/types/chat/completion_create_params.py b/src/llama_stack_client/types/chat/completion_create_params.py
deleted file mode 100644
index 2c9d26f7..00000000
--- a/src/llama_stack_client/types/chat/completion_create_params.py
+++ /dev/null
@@ -1,401 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, List, Union, Iterable
-from typing_extensions import Literal, Required, TypeAlias, TypedDict
-
-__all__ = [
-    "CompletionCreateParamsBase",
-    "Message",
-    "MessageOpenAIUserMessageParam",
-    "MessageOpenAIUserMessageParamContentUnionMember1",
-    "MessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
-    "MessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
-    "MessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
-    "MessageOpenAISystemMessageParam",
-    "MessageOpenAISystemMessageParamContentUnionMember1",
-    "MessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
-    "MessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
-    "MessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
-    "MessageOpenAIAssistantMessageParam",
-    "MessageOpenAIAssistantMessageParamContentUnionMember1",
-    "MessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
-    "MessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
-    "MessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
-    "MessageOpenAIAssistantMessageParamToolCall",
-    "MessageOpenAIAssistantMessageParamToolCallFunction",
-    "MessageOpenAIToolMessageParam",
-    "MessageOpenAIToolMessageParamContentUnionMember1",
-    "MessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
-    "MessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
-    "MessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
-    "MessageOpenAIDeveloperMessageParam",
-    "MessageOpenAIDeveloperMessageParamContentUnionMember1",
-    "MessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
-    "MessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
-    "MessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
-    "ResponseFormat",
-    "ResponseFormatOpenAIResponseFormatText",
-    "ResponseFormatOpenAIResponseFormatJsonSchema",
-    "ResponseFormatOpenAIResponseFormatJsonSchemaJsonSchema",
-    "ResponseFormatOpenAIResponseFormatJsonObject",
-    "CompletionCreateParamsNonStreaming",
-    "CompletionCreateParamsStreaming",
-]
-
-
-class CompletionCreateParamsBase(TypedDict, total=False):
-    messages: Required[Iterable[Message]]
-    """List of messages in the conversation."""
-
-    model: Required[str]
-    """The identifier of the model to use.
-
-    The model must be registered with Llama Stack and available via the /models
-    endpoint.
-    """
-
-    frequency_penalty: float
-    """(Optional) The penalty for repeated tokens."""
-
-    function_call: Union[str, Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-    """(Optional) The function call to use."""
-
-    functions: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-    """(Optional) List of functions to use."""
-
-    logit_bias: Dict[str, float]
-    """(Optional) The logit bias to use."""
-
-    logprobs: bool
-    """(Optional) The log probabilities to use."""
-
-    max_completion_tokens: int
-    """(Optional) The maximum number of tokens to generate."""
-
-    max_tokens: int
-    """(Optional) The maximum number of tokens to generate."""
-
-    n: int
-    """(Optional) The number of completions to generate."""
-
-    parallel_tool_calls: bool
-    """(Optional) Whether to parallelize tool calls."""
-
-    presence_penalty: float
-    """(Optional) The penalty for repeated tokens."""
-
-    response_format: ResponseFormat
-    """(Optional) The response format to use."""
-
-    seed: int
-    """(Optional) The seed to use."""
-
-    stop: Union[str, List[str]]
-    """(Optional) The stop tokens to use."""
-
-    stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
-    """(Optional) The stream options to use."""
-
-    temperature: float
-    """(Optional) The temperature to use."""
-
-    tool_choice: Union[str, Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-    """(Optional) The tool choice to use."""
-
-    tools: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-    """(Optional) The tools to use."""
-
-    top_logprobs: int
-    """(Optional) The top log probabilities to use."""
-
-    top_p: float
-    """(Optional) The top p to use."""
-
-    user: str
-    """(Optional) The user to use."""
-
-
-class MessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(TypedDict, total=False):
-    text: Required[str]
-
-    type: Required[Literal["text"]]
-
-
-class MessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
-    TypedDict, total=False
-):
-    url: Required[str]
-
-    detail: str
-
-
-class MessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(TypedDict, total=False):
-    image_url: Required[
-        MessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
-    ]
-
-    type: Required[Literal["image_url"]]
-
-
-MessageOpenAIUserMessageParamContentUnionMember1: TypeAlias = Union[
-    MessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
-    MessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
-]
-
-
-class MessageOpenAIUserMessageParam(TypedDict, total=False):
-    content: Required[Union[str, Iterable[MessageOpenAIUserMessageParamContentUnionMember1]]]
-    """The content of the message, which can include text and other media"""
-
-    role: Required[Literal["user"]]
-    """Must be "user" to identify this as a user message"""
-
-    name: str
-    """(Optional) The name of the user message participant."""
-
-
-class MessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(
-    TypedDict, total=False
-):
-    text: Required[str]
-
-    type: Required[Literal["text"]]
-
-
-class MessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
-    TypedDict, total=False
-):
-    url: Required[str]
-
-    detail: str
-
-
-class MessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(
-    TypedDict, total=False
-):
-    image_url: Required[
-        MessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
-    ]
-
-    type: Required[Literal["image_url"]]
-
-
-MessageOpenAISystemMessageParamContentUnionMember1: TypeAlias = Union[
-    MessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
-    MessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
-]
-
-
-class MessageOpenAISystemMessageParam(TypedDict, total=False):
-    content: Required[Union[str, Iterable[MessageOpenAISystemMessageParamContentUnionMember1]]]
-    """The content of the "system prompt".
-
-    If multiple system messages are provided, they are concatenated. The underlying
-    Llama Stack code may also add other system messages (for example, for formatting
-    tool definitions).
-    """
-
-    role: Required[Literal["system"]]
-    """Must be "system" to identify this as a system message"""
-
-    name: str
-    """(Optional) The name of the system message participant."""
-
-
-class MessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(
-    TypedDict, total=False
-):
-    text: Required[str]
-
-    type: Required[Literal["text"]]
-
-
-class MessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
-    TypedDict, total=False
-):
-    url: Required[str]
-
-    detail: str
-
-
-class MessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(
-    TypedDict, total=False
-):
-    image_url: Required[
-        MessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
-    ]
-
-    type: Required[Literal["image_url"]]
-
-
-MessageOpenAIAssistantMessageParamContentUnionMember1: TypeAlias = Union[
-    MessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
-    MessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
-]
-
-
-class MessageOpenAIAssistantMessageParamToolCallFunction(TypedDict, total=False):
-    arguments: str
-
-    name: str
-
-
-class MessageOpenAIAssistantMessageParamToolCall(TypedDict, total=False):
-    type: Required[Literal["function"]]
-
-    id: str
-
-    function: MessageOpenAIAssistantMessageParamToolCallFunction
-
-    index: int
-
-
-class MessageOpenAIAssistantMessageParam(TypedDict, total=False):
-    role: Required[Literal["assistant"]]
-    """Must be "assistant" to identify this as the model's response"""
-
-    content: Union[str, Iterable[MessageOpenAIAssistantMessageParamContentUnionMember1]]
-    """The content of the model's response"""
-
-    name: str
-    """(Optional) The name of the assistant message participant."""
-
-    tool_calls: Iterable[MessageOpenAIAssistantMessageParamToolCall]
-    """List of tool calls. Each tool call is an OpenAIChatCompletionToolCall object."""
-
-
-class MessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(TypedDict, total=False):
-    text: Required[str]
-
-    type: Required[Literal["text"]]
-
-
-class MessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
-    TypedDict, total=False
-):
-    url: Required[str]
-
-    detail: str
-
-
-class MessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(TypedDict, total=False):
-    image_url: Required[
-        MessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
-    ]
-
-    type: Required[Literal["image_url"]]
-
-
-MessageOpenAIToolMessageParamContentUnionMember1: TypeAlias = Union[
-    MessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
-    MessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
-]
-
-
-class MessageOpenAIToolMessageParam(TypedDict, total=False):
-    content: Required[Union[str, Iterable[MessageOpenAIToolMessageParamContentUnionMember1]]]
-    """The response content from the tool"""
-
-    role: Required[Literal["tool"]]
-    """Must be "tool" to identify this as a tool response"""
-
-    tool_call_id: Required[str]
-    """Unique identifier for the tool call this response is for"""
-
-
-class MessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(
-    TypedDict, total=False
-):
-    text: Required[str]
-
-    type: Required[Literal["text"]]
-
-
-class MessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
-    TypedDict, total=False
-):
-    url: Required[str]
-
-    detail: str
-
-
-class MessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(
-    TypedDict, total=False
-):
-    image_url: Required[
-        MessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
-    ]
-
-    type: Required[Literal["image_url"]]
-
-
-MessageOpenAIDeveloperMessageParamContentUnionMember1: TypeAlias = Union[
-    MessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
-    MessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
-]
-
-
-class MessageOpenAIDeveloperMessageParam(TypedDict, total=False):
-    content: Required[Union[str, Iterable[MessageOpenAIDeveloperMessageParamContentUnionMember1]]]
-    """The content of the developer message"""
-
-    role: Required[Literal["developer"]]
-    """Must be "developer" to identify this as a developer message"""
-
-    name: str
-    """(Optional) The name of the developer message participant."""
-
-
-Message: TypeAlias = Union[
-    MessageOpenAIUserMessageParam,
-    MessageOpenAISystemMessageParam,
-    MessageOpenAIAssistantMessageParam,
-    MessageOpenAIToolMessageParam,
-    MessageOpenAIDeveloperMessageParam,
-]
-
-
-class ResponseFormatOpenAIResponseFormatText(TypedDict, total=False):
-    type: Required[Literal["text"]]
-
-
-class ResponseFormatOpenAIResponseFormatJsonSchemaJsonSchema(TypedDict, total=False):
-    name: Required[str]
-
-    description: str
-
-    schema: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
-
-    strict: bool
-
-
-class ResponseFormatOpenAIResponseFormatJsonSchema(TypedDict, total=False):
-    json_schema: Required[ResponseFormatOpenAIResponseFormatJsonSchemaJsonSchema]
-
-    type: Required[Literal["json_schema"]]
-
-
-class ResponseFormatOpenAIResponseFormatJsonObject(TypedDict, total=False):
-    type: Required[Literal["json_object"]]
-
-
-ResponseFormat: TypeAlias = Union[
-    ResponseFormatOpenAIResponseFormatText,
-    ResponseFormatOpenAIResponseFormatJsonSchema,
-    ResponseFormatOpenAIResponseFormatJsonObject,
-]
-
-
-class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase, total=False):
-    stream: Literal[False]
-    """(Optional) Whether to stream the response."""
-
-
-class CompletionCreateParamsStreaming(CompletionCreateParamsBase):
-    stream: Required[Literal[True]]
-    """(Optional) Whether to stream the response."""
-
-
-CompletionCreateParams = Union[CompletionCreateParamsNonStreaming, CompletionCreateParamsStreaming]
diff --git a/src/llama_stack_client/types/chat/completion_create_response.py b/src/llama_stack_client/types/chat/completion_create_response.py
deleted file mode 100644
index 5c8eb51c..00000000
--- a/src/llama_stack_client/types/chat/completion_create_response.py
+++ /dev/null
@@ -1,383 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Union, Optional
-from typing_extensions import Literal, Annotated, TypeAlias
-
-from ..._utils import PropertyInfo
-from ..._models import BaseModel
-from ..chat_completion_chunk import ChatCompletionChunk
-
-__all__ = [
-    "CompletionCreateResponse",
-    "OpenAIChatCompletion",
-    "OpenAIChatCompletionChoice",
-    "OpenAIChatCompletionChoiceMessage",
-    "OpenAIChatCompletionChoiceMessageOpenAIUserMessageParam",
-    "OpenAIChatCompletionChoiceMessageOpenAIUserMessageParamContentUnionMember1",
-    "OpenAIChatCompletionChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
-    "OpenAIChatCompletionChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
-    "OpenAIChatCompletionChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
-    "OpenAIChatCompletionChoiceMessageOpenAISystemMessageParam",
-    "OpenAIChatCompletionChoiceMessageOpenAISystemMessageParamContentUnionMember1",
-    "OpenAIChatCompletionChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
-    "OpenAIChatCompletionChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
-    "OpenAIChatCompletionChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
-    "OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParam",
-    "OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParamContentUnionMember1",
-    "OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
-    "OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
-    "OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
-    "OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParamToolCall",
-    "OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParamToolCallFunction",
-    "OpenAIChatCompletionChoiceMessageOpenAIToolMessageParam",
-    "OpenAIChatCompletionChoiceMessageOpenAIToolMessageParamContentUnionMember1",
-    "OpenAIChatCompletionChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
-    "OpenAIChatCompletionChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
-    "OpenAIChatCompletionChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
-    "OpenAIChatCompletionChoiceMessageOpenAIDeveloperMessageParam",
-    "OpenAIChatCompletionChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1",
-    "OpenAIChatCompletionChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
-    "OpenAIChatCompletionChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
-    "OpenAIChatCompletionChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
-    "OpenAIChatCompletionChoiceLogprobs",
-    "OpenAIChatCompletionChoiceLogprobsContent",
-    "OpenAIChatCompletionChoiceLogprobsContentTopLogprob",
-    "OpenAIChatCompletionChoiceLogprobsRefusal",
-    "OpenAIChatCompletionChoiceLogprobsRefusalTopLogprob",
-]
-
-
-class OpenAIChatCompletionChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(
-    BaseModel
-):
-    text: str
-
-    type: Literal["text"]
-
-
-class OpenAIChatCompletionChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
-    BaseModel
-):
-    url: str
-
-    detail: Optional[str] = None
-
-
-class OpenAIChatCompletionChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(
-    BaseModel
-):
-    image_url: OpenAIChatCompletionChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
-
-    type: Literal["image_url"]
-
-
-OpenAIChatCompletionChoiceMessageOpenAIUserMessageParamContentUnionMember1: TypeAlias = Annotated[
-    Union[
-        OpenAIChatCompletionChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
-        OpenAIChatCompletionChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class OpenAIChatCompletionChoiceMessageOpenAIUserMessageParam(BaseModel):
-    content: Union[str, List[OpenAIChatCompletionChoiceMessageOpenAIUserMessageParamContentUnionMember1]]
-    """The content of the message, which can include text and other media"""
-
-    role: Literal["user"]
-    """Must be "user" to identify this as a user message"""
-
-    name: Optional[str] = None
-    """(Optional) The name of the user message participant."""
-
-
-class OpenAIChatCompletionChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(
-    BaseModel
-):
-    text: str
-
-    type: Literal["text"]
-
-
-class OpenAIChatCompletionChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
-    BaseModel
-):
-    url: str
-
-    detail: Optional[str] = None
-
-
-class OpenAIChatCompletionChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(
-    BaseModel
-):
-    image_url: OpenAIChatCompletionChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
-
-    type: Literal["image_url"]
-
-
-OpenAIChatCompletionChoiceMessageOpenAISystemMessageParamContentUnionMember1: TypeAlias = Annotated[
-    Union[
-        OpenAIChatCompletionChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
-        OpenAIChatCompletionChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class OpenAIChatCompletionChoiceMessageOpenAISystemMessageParam(BaseModel):
-    content: Union[str, List[OpenAIChatCompletionChoiceMessageOpenAISystemMessageParamContentUnionMember1]]
-    """The content of the "system prompt".
-
-    If multiple system messages are provided, they are concatenated. The underlying
-    Llama Stack code may also add other system messages (for example, for formatting
-    tool definitions).
-    """
-
-    role: Literal["system"]
-    """Must be "system" to identify this as a system message"""
-
-    name: Optional[str] = None
-    """(Optional) The name of the system message participant."""
-
-
-class OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(
-    BaseModel
-):
-    text: str
-
-    type: Literal["text"]
-
-
-class OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
-    BaseModel
-):
-    url: str
-
-    detail: Optional[str] = None
-
-
-class OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(
-    BaseModel
-):
-    image_url: OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
-
-    type: Literal["image_url"]
-
-
-OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParamContentUnionMember1: TypeAlias = Annotated[
-    Union[
-        OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
-        OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParamToolCallFunction(BaseModel):
-    arguments: Optional[str] = None
-
-    name: Optional[str] = None
-
-
-class OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParamToolCall(BaseModel):
-    type: Literal["function"]
-
-    id: Optional[str] = None
-
-    function: Optional[OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParamToolCallFunction] = None
-
-    index: Optional[int] = None
-
-
-class OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParam(BaseModel):
-    role: Literal["assistant"]
-    """Must be "assistant" to identify this as the model's response"""
-
-    content: Union[str, List[OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParamContentUnionMember1], None] = (
-        None
-    )
-    """The content of the model's response"""
-
-    name: Optional[str] = None
-    """(Optional) The name of the assistant message participant."""
-
-    tool_calls: Optional[List[OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParamToolCall]] = None
-    """List of tool calls. Each tool call is an OpenAIChatCompletionToolCall object."""
-
-
-class OpenAIChatCompletionChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(
-    BaseModel
-):
-    text: str
-
-    type: Literal["text"]
-
-
-class OpenAIChatCompletionChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
-    BaseModel
-):
-    url: str
-
-    detail: Optional[str] = None
-
-
-class OpenAIChatCompletionChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(
-    BaseModel
-):
-    image_url: OpenAIChatCompletionChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
-
-    type: Literal["image_url"]
-
-
-OpenAIChatCompletionChoiceMessageOpenAIToolMessageParamContentUnionMember1: TypeAlias = Annotated[
-    Union[
-        OpenAIChatCompletionChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
-        OpenAIChatCompletionChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class OpenAIChatCompletionChoiceMessageOpenAIToolMessageParam(BaseModel):
-    content: Union[str, List[OpenAIChatCompletionChoiceMessageOpenAIToolMessageParamContentUnionMember1]]
-    """The response content from the tool"""
-
-    role: Literal["tool"]
-    """Must be "tool" to identify this as a tool response"""
-
-    tool_call_id: str
-    """Unique identifier for the tool call this response is for"""
-
-
-class OpenAIChatCompletionChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(
-    BaseModel
-):
-    text: str
-
-    type: Literal["text"]
-
-
-class OpenAIChatCompletionChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
-    BaseModel
-):
-    url: str
-
-    detail: Optional[str] = None
-
-
-class OpenAIChatCompletionChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(
-    BaseModel
-):
-    image_url: OpenAIChatCompletionChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
-
-    type: Literal["image_url"]
-
-
-OpenAIChatCompletionChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1: TypeAlias = Annotated[
-    Union[
-        OpenAIChatCompletionChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
-        OpenAIChatCompletionChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class OpenAIChatCompletionChoiceMessageOpenAIDeveloperMessageParam(BaseModel):
-    content: Union[str, List[OpenAIChatCompletionChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1]]
-    """The content of the developer message"""
-
-    role: Literal["developer"]
-    """Must be "developer" to identify this as a developer message"""
-
-    name: Optional[str] = None
-    """(Optional) The name of the developer message participant."""
-
-
-OpenAIChatCompletionChoiceMessage: TypeAlias = Annotated[
-    Union[
-        OpenAIChatCompletionChoiceMessageOpenAIUserMessageParam,
-        OpenAIChatCompletionChoiceMessageOpenAISystemMessageParam,
-        OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParam,
-        OpenAIChatCompletionChoiceMessageOpenAIToolMessageParam,
-        OpenAIChatCompletionChoiceMessageOpenAIDeveloperMessageParam,
-    ],
-    PropertyInfo(discriminator="role"),
-]
-
-
-class OpenAIChatCompletionChoiceLogprobsContentTopLogprob(BaseModel):
-    token: str
-
-    logprob: float
-
-    bytes: Optional[List[int]] = None
-
-
-class OpenAIChatCompletionChoiceLogprobsContent(BaseModel):
-    token: str
-
-    logprob: float
-
-    top_logprobs: List[OpenAIChatCompletionChoiceLogprobsContentTopLogprob]
-
-    bytes: Optional[List[int]] = None
-
-
-class OpenAIChatCompletionChoiceLogprobsRefusalTopLogprob(BaseModel):
-    token: str
-
-    logprob: float
-
-    bytes: Optional[List[int]] = None
-
-
-class OpenAIChatCompletionChoiceLogprobsRefusal(BaseModel):
-    token: str
-
-    logprob: float
-
-    top_logprobs: List[OpenAIChatCompletionChoiceLogprobsRefusalTopLogprob]
-
-    bytes: Optional[List[int]] = None
-
-
-class OpenAIChatCompletionChoiceLogprobs(BaseModel):
-    content: Optional[List[OpenAIChatCompletionChoiceLogprobsContent]] = None
-    """(Optional) The log probabilities for the tokens in the message"""
-
-    refusal: Optional[List[OpenAIChatCompletionChoiceLogprobsRefusal]] = None
-    """(Optional) The log probabilities for the tokens in the message"""
-
-
-class OpenAIChatCompletionChoice(BaseModel):
-    finish_reason: str
-    """The reason the model stopped generating"""
-
-    index: int
-    """The index of the choice"""
-
-    message: OpenAIChatCompletionChoiceMessage
-    """The message from the model"""
-
-    logprobs: Optional[OpenAIChatCompletionChoiceLogprobs] = None
-    """(Optional) The log probabilities for the tokens in the message"""
-
-
-class OpenAIChatCompletion(BaseModel):
-    id: str
-    """The ID of the chat completion"""
-
-    choices: List[OpenAIChatCompletionChoice]
-    """List of choices"""
-
-    created: int
-    """The Unix timestamp in seconds when the chat completion was created"""
-
-    model: str
-    """The model that was used to generate the chat completion"""
-
-    object: Literal["chat.completion"]
-    """The object type, which will be "chat.completion" """
-
-
-CompletionCreateResponse: TypeAlias = Union[OpenAIChatCompletion, ChatCompletionChunk]
diff --git a/src/llama_stack_client/types/chat/completion_list_params.py b/src/llama_stack_client/types/chat/completion_list_params.py
deleted file mode 100644
index 5fb77c2c..00000000
--- a/src/llama_stack_client/types/chat/completion_list_params.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Literal, TypedDict
-
-__all__ = ["CompletionListParams"]
-
-
-class CompletionListParams(TypedDict, total=False):
-    after: str
-    """The ID of the last chat completion to return."""
-
-    limit: int
-    """The maximum number of chat completions to return."""
-
-    model: str
-    """The model to filter by."""
-
-    order: Literal["asc", "desc"]
-    """The order to sort the chat completions by: "asc" or "desc". Defaults to "desc"."""
diff --git a/src/llama_stack_client/types/chat/completion_list_response.py b/src/llama_stack_client/types/chat/completion_list_response.py
deleted file mode 100644
index d3b580a1..00000000
--- a/src/llama_stack_client/types/chat/completion_list_response.py
+++ /dev/null
@@ -1,667 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Union, Optional
-from typing_extensions import Literal, Annotated, TypeAlias
-
-from ..._utils import PropertyInfo
-from ..._models import BaseModel
-
-__all__ = [
-    "CompletionListResponse",
-    "Data",
-    "DataChoice",
-    "DataChoiceMessage",
-    "DataChoiceMessageOpenAIUserMessageParam",
-    "DataChoiceMessageOpenAIUserMessageParamContentUnionMember1",
-    "DataChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
-    "DataChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
-    "DataChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
-    "DataChoiceMessageOpenAISystemMessageParam",
-    "DataChoiceMessageOpenAISystemMessageParamContentUnionMember1",
-    "DataChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
-    "DataChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
-    "DataChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
-    "DataChoiceMessageOpenAIAssistantMessageParam",
-    "DataChoiceMessageOpenAIAssistantMessageParamContentUnionMember1",
-    "DataChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
-    "DataChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
-    "DataChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
-    "DataChoiceMessageOpenAIAssistantMessageParamToolCall",
-    "DataChoiceMessageOpenAIAssistantMessageParamToolCallFunction",
-    "DataChoiceMessageOpenAIToolMessageParam",
-    "DataChoiceMessageOpenAIToolMessageParamContentUnionMember1",
-    "DataChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
-    "DataChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
-    "DataChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
-    "DataChoiceMessageOpenAIDeveloperMessageParam",
-    "DataChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1",
-    "DataChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
-    "DataChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
-    "DataChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
-    "DataChoiceLogprobs",
-    "DataChoiceLogprobsContent",
-    "DataChoiceLogprobsContentTopLogprob",
-    "DataChoiceLogprobsRefusal",
-    "DataChoiceLogprobsRefusalTopLogprob",
-    "DataInputMessage",
-    "DataInputMessageOpenAIUserMessageParam",
-    "DataInputMessageOpenAIUserMessageParamContentUnionMember1",
-    "DataInputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
-    "DataInputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
-    "DataInputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
-    "DataInputMessageOpenAISystemMessageParam",
-    "DataInputMessageOpenAISystemMessageParamContentUnionMember1",
-    "DataInputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
-    "DataInputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
-    "DataInputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
-    "DataInputMessageOpenAIAssistantMessageParam",
-    "DataInputMessageOpenAIAssistantMessageParamContentUnionMember1",
-    "DataInputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
-    "DataInputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
-    "DataInputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
-    "DataInputMessageOpenAIAssistantMessageParamToolCall",
-    "DataInputMessageOpenAIAssistantMessageParamToolCallFunction",
-    "DataInputMessageOpenAIToolMessageParam",
-    "DataInputMessageOpenAIToolMessageParamContentUnionMember1",
-    "DataInputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
-    "DataInputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
-    "DataInputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
-    "DataInputMessageOpenAIDeveloperMessageParam",
-    "DataInputMessageOpenAIDeveloperMessageParamContentUnionMember1",
-    "DataInputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
-    "DataInputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
-    "DataInputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
-]
-
-
-class DataChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel):
-    text: str
-
-    type: Literal["text"]
-
-
-class DataChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
-    BaseModel
-):
-    url: str
-
-    detail: Optional[str] = None
-
-
-class DataChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel):
-    image_url: (
-        DataChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
-    )
-
-    type: Literal["image_url"]
-
-
-DataChoiceMessageOpenAIUserMessageParamContentUnionMember1: TypeAlias = Annotated[
-    Union[
-        DataChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
-        DataChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class DataChoiceMessageOpenAIUserMessageParam(BaseModel):
-    content: Union[str, List[DataChoiceMessageOpenAIUserMessageParamContentUnionMember1]]
-    """The content of the message, which can include text and other media"""
-
-    role: Literal["user"]
-    """Must be "user" to identify this as a user message"""
-
-    name: Optional[str] = None
-    """(Optional) The name of the user message participant."""
-
-
-class DataChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel):
-    text: str
-
-    type: Literal["text"]
-
-
-class DataChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
-    BaseModel
-):
-    url: str
-
-    detail: Optional[str] = None
-
-
-class DataChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel):
-    image_url: (
-        DataChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
-    )
-
-    type: Literal["image_url"]
-
-
-DataChoiceMessageOpenAISystemMessageParamContentUnionMember1: TypeAlias = Annotated[
-    Union[
-        DataChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
-        DataChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class DataChoiceMessageOpenAISystemMessageParam(BaseModel):
-    content: Union[str, List[DataChoiceMessageOpenAISystemMessageParamContentUnionMember1]]
-    """The content of the "system prompt".
-
-    If multiple system messages are provided, they are concatenated. The underlying
-    Llama Stack code may also add other system messages (for example, for formatting
-    tool definitions).
-    """
-
-    role: Literal["system"]
-    """Must be "system" to identify this as a system message"""
-
-    name: Optional[str] = None
-    """(Optional) The name of the system message participant."""
-
-
-class DataChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(
-    BaseModel
-):
-    text: str
-
-    type: Literal["text"]
-
-
-class DataChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
-    BaseModel
-):
-    url: str
-
-    detail: Optional[str] = None
-
-
-class DataChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(
-    BaseModel
-):
-    image_url: (
-        DataChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
-    )
-
-    type: Literal["image_url"]
-
-
-DataChoiceMessageOpenAIAssistantMessageParamContentUnionMember1: TypeAlias = Annotated[
-    Union[
-        DataChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
-        DataChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class DataChoiceMessageOpenAIAssistantMessageParamToolCallFunction(BaseModel):
-    arguments: Optional[str] = None
-
-    name: Optional[str] = None
-
-
-class DataChoiceMessageOpenAIAssistantMessageParamToolCall(BaseModel):
-    type: Literal["function"]
-
-    id: Optional[str] = None
-
-    function: Optional[DataChoiceMessageOpenAIAssistantMessageParamToolCallFunction] = None
-
-    index: Optional[int] = None
-
-
-class DataChoiceMessageOpenAIAssistantMessageParam(BaseModel):
-    role: Literal["assistant"]
-    """Must be "assistant" to identify this as the model's response"""
-
-    content: Union[str, List[DataChoiceMessageOpenAIAssistantMessageParamContentUnionMember1], None] = None
-    """The content of the model's response"""
-
-    name: Optional[str] = None
-    """(Optional) The name of the assistant message participant."""
-
-    tool_calls: Optional[List[DataChoiceMessageOpenAIAssistantMessageParamToolCall]] = None
-    """List of tool calls. Each tool call is an OpenAIChatCompletionToolCall object."""
-
-
-class DataChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel):
-    text: str
-
-    type: Literal["text"]
-
-
-class DataChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
-    BaseModel
-):
-    url: str
-
-    detail: Optional[str] = None
-
-
-class DataChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel):
-    image_url: (
-        DataChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
-    )
-
-    type: Literal["image_url"]
-
-
-DataChoiceMessageOpenAIToolMessageParamContentUnionMember1: TypeAlias = Annotated[
-    Union[
-        DataChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
-        DataChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class DataChoiceMessageOpenAIToolMessageParam(BaseModel):
-    content: Union[str, List[DataChoiceMessageOpenAIToolMessageParamContentUnionMember1]]
-    """The response content from the tool"""
-
-    role: Literal["tool"]
-    """Must be "tool" to identify this as a tool response"""
-
-    tool_call_id: str
-    """Unique identifier for the tool call this response is for"""
-
-
-class DataChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(
-    BaseModel
-):
-    text: str
-
-    type: Literal["text"]
-
-
-class DataChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
-    BaseModel
-):
-    url: str
-
-    detail: Optional[str] = None
-
-
-class DataChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(
-    BaseModel
-):
-    image_url: (
-        DataChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
-    )
-
-    type: Literal["image_url"]
-
-
-DataChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1: TypeAlias = Annotated[
-    Union[
-        DataChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
-        DataChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class DataChoiceMessageOpenAIDeveloperMessageParam(BaseModel):
-    content: Union[str, List[DataChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1]]
-    """The content of the developer message"""
-
-    role: Literal["developer"]
-    """Must be "developer" to identify this as a developer message"""
-
-    name: Optional[str] = None
-    """(Optional) The name of the developer message participant."""
-
-
-DataChoiceMessage: TypeAlias = Annotated[
-    Union[
-        DataChoiceMessageOpenAIUserMessageParam,
-        DataChoiceMessageOpenAISystemMessageParam,
-        DataChoiceMessageOpenAIAssistantMessageParam,
-        DataChoiceMessageOpenAIToolMessageParam,
-        DataChoiceMessageOpenAIDeveloperMessageParam,
-    ],
-    PropertyInfo(discriminator="role"),
-]
-
-
-class DataChoiceLogprobsContentTopLogprob(BaseModel):
-    token: str
-
-    logprob: float
-
-    bytes: Optional[List[int]] = None
-
-
-class DataChoiceLogprobsContent(BaseModel):
-    token: str
-
-    logprob: float
-
-    top_logprobs: List[DataChoiceLogprobsContentTopLogprob]
-
-    bytes: Optional[List[int]] = None
-
-
-class DataChoiceLogprobsRefusalTopLogprob(BaseModel):
-    token: str
-
-    logprob: float
-
-    bytes: Optional[List[int]] = None
-
-
-class DataChoiceLogprobsRefusal(BaseModel):
-    token: str
-
-    logprob: float
-
-    top_logprobs: List[DataChoiceLogprobsRefusalTopLogprob]
-
-    bytes: Optional[List[int]] = None
-
-
-class DataChoiceLogprobs(BaseModel):
-    content: Optional[List[DataChoiceLogprobsContent]] = None
-    """(Optional) The log probabilities for the tokens in the message"""
-
-    refusal: Optional[List[DataChoiceLogprobsRefusal]] = None
-    """(Optional) The log probabilities for the tokens in the message"""
-
-
-class DataChoice(BaseModel):
-    finish_reason: str
-    """The reason the model stopped generating"""
-
-    index: int
-    """The index of the choice"""
-
-    message: DataChoiceMessage
-    """The message from the model"""
-
-    logprobs: Optional[DataChoiceLogprobs] = None
-    """(Optional) The log probabilities for the tokens in the message"""
-
-
-class DataInputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel):
-    text: str
-
-    type: Literal["text"]
-
-
-class DataInputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
-    BaseModel
-):
-    url: str
-
-    detail: Optional[str] = None
-
-
-class DataInputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel):
-    image_url: (
-        DataInputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
-    )
-
-    type: Literal["image_url"]
-
-
-DataInputMessageOpenAIUserMessageParamContentUnionMember1: TypeAlias = Annotated[
-    Union[
-        DataInputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
-        DataInputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class DataInputMessageOpenAIUserMessageParam(BaseModel):
-    content: Union[str, List[DataInputMessageOpenAIUserMessageParamContentUnionMember1]]
-    """The content of the message, which can include text and other media"""
-
-    role: Literal["user"]
-    """Must be "user" to identify this as a user message"""
-
-    name: Optional[str] = None
-    """(Optional) The name of the user message participant."""
-
-
-class DataInputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel):
-    text: str
-
-    type: Literal["text"]
-
-
-class DataInputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
-    BaseModel
-):
-    url: str
-
-    detail: Optional[str] = None
-
-
-class DataInputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel):
-    image_url: (
-        DataInputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
-    )
-
-    type: Literal["image_url"]
-
-
-DataInputMessageOpenAISystemMessageParamContentUnionMember1: TypeAlias = Annotated[
-    Union[
-        DataInputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
-        DataInputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class DataInputMessageOpenAISystemMessageParam(BaseModel):
-    content: Union[str, List[DataInputMessageOpenAISystemMessageParamContentUnionMember1]]
-    """The content of the "system prompt".
-
-    If multiple system messages are provided, they are concatenated. The underlying
-    Llama Stack code may also add other system messages (for example, for formatting
-    tool definitions).
-    """
-
-    role: Literal["system"]
-    """Must be "system" to identify this as a system message"""
-
-    name: Optional[str] = None
-    """(Optional) The name of the system message participant."""
-
-
-class DataInputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel):
-    text: str
-
-    type: Literal["text"]
-
-
-class DataInputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
-    BaseModel
-):
-    url: str
-
-    detail: Optional[str] = None
-
-
-class DataInputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(
-    BaseModel
-):
-    image_url: (
-        DataInputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
-    )
-
-    type: Literal["image_url"]
-
-
-DataInputMessageOpenAIAssistantMessageParamContentUnionMember1: TypeAlias = Annotated[
-    Union[
-        DataInputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
-        DataInputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class DataInputMessageOpenAIAssistantMessageParamToolCallFunction(BaseModel):
-    arguments: Optional[str] = None
-
-    name: Optional[str] = None
-
-
-class DataInputMessageOpenAIAssistantMessageParamToolCall(BaseModel):
-    type: Literal["function"]
-
-    id: Optional[str] = None
-
-    function: Optional[DataInputMessageOpenAIAssistantMessageParamToolCallFunction] = None
-
-    index: Optional[int] = None
-
-
-class DataInputMessageOpenAIAssistantMessageParam(BaseModel):
-    role: Literal["assistant"]
-    """Must be "assistant" to identify this as the model's response"""
-
-    content: Union[str, List[DataInputMessageOpenAIAssistantMessageParamContentUnionMember1], None] = None
-    """The content of the model's response"""
-
-    name: Optional[str] = None
-    """(Optional) The name of the assistant message participant."""
-
-    tool_calls: Optional[List[DataInputMessageOpenAIAssistantMessageParamToolCall]] = None
-    """List of tool calls. Each tool call is an OpenAIChatCompletionToolCall object."""
-
-
-class DataInputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel):
-    text: str
-
-    type: Literal["text"]
-
-
-class DataInputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
-    BaseModel
-):
-    url: str
-
-    detail: Optional[str] = None
-
-
-class DataInputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel):
-    image_url: (
-        DataInputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
-    )
-
-    type: Literal["image_url"]
-
-
-DataInputMessageOpenAIToolMessageParamContentUnionMember1: TypeAlias = Annotated[
-    Union[
-        DataInputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
-        DataInputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class DataInputMessageOpenAIToolMessageParam(BaseModel):
-    content: Union[str, List[DataInputMessageOpenAIToolMessageParamContentUnionMember1]]
-    """The response content from the tool"""
-
-    role: Literal["tool"]
-    """Must be "tool" to identify this as a tool response"""
-
-    tool_call_id: str
-    """Unique identifier for the tool call this response is for"""
-
-
-class DataInputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel):
-    text: str
-
-    type: Literal["text"]
-
-
-class DataInputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
-    BaseModel
-):
-    url: str
-
-    detail: Optional[str] = None
-
-
-class DataInputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(
-    BaseModel
-):
-    image_url: (
-        DataInputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
-    )
-
-    type: Literal["image_url"]
-
-
-DataInputMessageOpenAIDeveloperMessageParamContentUnionMember1: TypeAlias = Annotated[
-    Union[
-        DataInputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
-        DataInputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class DataInputMessageOpenAIDeveloperMessageParam(BaseModel):
-    content: Union[str, List[DataInputMessageOpenAIDeveloperMessageParamContentUnionMember1]]
-    """The content of the developer message"""
-
-    role: Literal["developer"]
-    """Must be "developer" to identify this as a developer message"""
-
-    name: Optional[str] = None
-    """(Optional) The name of the developer message participant."""
-
-
-DataInputMessage: TypeAlias = Annotated[
-    Union[
-        DataInputMessageOpenAIUserMessageParam,
-        DataInputMessageOpenAISystemMessageParam,
-        DataInputMessageOpenAIAssistantMessageParam,
-        DataInputMessageOpenAIToolMessageParam,
-        DataInputMessageOpenAIDeveloperMessageParam,
-    ],
-    PropertyInfo(discriminator="role"),
-]
-
-
-class Data(BaseModel):
-    id: str
-    """The ID of the chat completion"""
-
-    choices: List[DataChoice]
-    """List of choices"""
-
-    created: int
-    """The Unix timestamp in seconds when the chat completion was created"""
-
-    input_messages: List[DataInputMessage]
-
-    model: str
-    """The model that was used to generate the chat completion"""
-
-    object: Literal["chat.completion"]
-    """The object type, which will be "chat.completion" """
-
-
-class CompletionListResponse(BaseModel):
-    data: List[Data]
-
-    first_id: str
-
-    has_more: bool
-
-    last_id: str
-
-    object: Literal["list"]
diff --git a/src/llama_stack_client/types/chat/completion_retrieve_response.py b/src/llama_stack_client/types/chat/completion_retrieve_response.py
deleted file mode 100644
index 330c752d..00000000
--- a/src/llama_stack_client/types/chat/completion_retrieve_response.py
+++ /dev/null
@@ -1,626 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Union, Optional
-from typing_extensions import Literal, Annotated, TypeAlias
-
-from ..._utils import PropertyInfo
-from ..._models import BaseModel
-
-__all__ = [
-    "CompletionRetrieveResponse",
-    "Choice",
-    "ChoiceMessage",
-    "ChoiceMessageOpenAIUserMessageParam",
-    "ChoiceMessageOpenAIUserMessageParamContentUnionMember1",
-    "ChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
-    "ChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
-    "ChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
-    "ChoiceMessageOpenAISystemMessageParam",
-    "ChoiceMessageOpenAISystemMessageParamContentUnionMember1",
-    "ChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
-    "ChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
-    "ChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
-    "ChoiceMessageOpenAIAssistantMessageParam",
-    "ChoiceMessageOpenAIAssistantMessageParamContentUnionMember1",
-    "ChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
-    "ChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
-    "ChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
-    "ChoiceMessageOpenAIAssistantMessageParamToolCall",
-    "ChoiceMessageOpenAIAssistantMessageParamToolCallFunction",
-    "ChoiceMessageOpenAIToolMessageParam",
-    "ChoiceMessageOpenAIToolMessageParamContentUnionMember1",
-    "ChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
-    "ChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
-    "ChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
-    "ChoiceMessageOpenAIDeveloperMessageParam",
-    "ChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1",
-    "ChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
-    "ChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
-    "ChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
-    "ChoiceLogprobs",
-    "ChoiceLogprobsContent",
-    "ChoiceLogprobsContentTopLogprob",
-    "ChoiceLogprobsRefusal",
-    "ChoiceLogprobsRefusalTopLogprob",
-    "InputMessage",
-    "InputMessageOpenAIUserMessageParam",
-    "InputMessageOpenAIUserMessageParamContentUnionMember1",
-    "InputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
-    "InputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
-    "InputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
-    "InputMessageOpenAISystemMessageParam",
-    "InputMessageOpenAISystemMessageParamContentUnionMember1",
-    "InputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
-    "InputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
-    "InputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
-    "InputMessageOpenAIAssistantMessageParam",
-    "InputMessageOpenAIAssistantMessageParamContentUnionMember1",
-    "InputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
-    "InputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
-    "InputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
-    "InputMessageOpenAIAssistantMessageParamToolCall",
-    "InputMessageOpenAIAssistantMessageParamToolCallFunction",
-    "InputMessageOpenAIToolMessageParam",
-    "InputMessageOpenAIToolMessageParamContentUnionMember1",
-    "InputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
-    "InputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
-    "InputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
-    "InputMessageOpenAIDeveloperMessageParam",
-    "InputMessageOpenAIDeveloperMessageParamContentUnionMember1",
-    "InputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam",
-    "InputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam",
-    "InputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL",
-]
-
-
-class ChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel):
-    text: str
-
-    type: Literal["text"]
-
-
-class ChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
-    BaseModel
-):
-    url: str
-
-    detail: Optional[str] = None
-
-
-class ChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel):
-    image_url: ChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
-
-    type: Literal["image_url"]
-
-
-ChoiceMessageOpenAIUserMessageParamContentUnionMember1: TypeAlias = Annotated[
-    Union[
-        ChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
-        ChoiceMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class ChoiceMessageOpenAIUserMessageParam(BaseModel):
-    content: Union[str, List[ChoiceMessageOpenAIUserMessageParamContentUnionMember1]]
-    """The content of the message, which can include text and other media"""
-
-    role: Literal["user"]
-    """Must be "user" to identify this as a user message"""
-
-    name: Optional[str] = None
-    """(Optional) The name of the user message participant."""
-
-
-class ChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel):
-    text: str
-
-    type: Literal["text"]
-
-
-class ChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
-    BaseModel
-):
-    url: str
-
-    detail: Optional[str] = None
-
-
-class ChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel):
-    image_url: ChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
-
-    type: Literal["image_url"]
-
-
-ChoiceMessageOpenAISystemMessageParamContentUnionMember1: TypeAlias = Annotated[
-    Union[
-        ChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
-        ChoiceMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class ChoiceMessageOpenAISystemMessageParam(BaseModel):
-    content: Union[str, List[ChoiceMessageOpenAISystemMessageParamContentUnionMember1]]
-    """The content of the "system prompt".
-
-    If multiple system messages are provided, they are concatenated. The underlying
-    Llama Stack code may also add other system messages (for example, for formatting
-    tool definitions).
-    """
-
-    role: Literal["system"]
-    """Must be "system" to identify this as a system message"""
-
-    name: Optional[str] = None
-    """(Optional) The name of the system message participant."""
-
-
-class ChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel):
-    text: str
-
-    type: Literal["text"]
-
-
-class ChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
-    BaseModel
-):
-    url: str
-
-    detail: Optional[str] = None
-
-
-class ChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel):
-    image_url: (
-        ChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
-    )
-
-    type: Literal["image_url"]
-
-
-ChoiceMessageOpenAIAssistantMessageParamContentUnionMember1: TypeAlias = Annotated[
-    Union[
-        ChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
-        ChoiceMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class ChoiceMessageOpenAIAssistantMessageParamToolCallFunction(BaseModel):
-    arguments: Optional[str] = None
-
-    name: Optional[str] = None
-
-
-class ChoiceMessageOpenAIAssistantMessageParamToolCall(BaseModel):
-    type: Literal["function"]
-
-    id: Optional[str] = None
-
-    function: Optional[ChoiceMessageOpenAIAssistantMessageParamToolCallFunction] = None
-
-    index: Optional[int] = None
-
-
-class ChoiceMessageOpenAIAssistantMessageParam(BaseModel):
-    role: Literal["assistant"]
-    """Must be "assistant" to identify this as the model's response"""
-
-    content: Union[str, List[ChoiceMessageOpenAIAssistantMessageParamContentUnionMember1], None] = None
-    """The content of the model's response"""
-
-    name: Optional[str] = None
-    """(Optional) The name of the assistant message participant."""
-
-    tool_calls: Optional[List[ChoiceMessageOpenAIAssistantMessageParamToolCall]] = None
-    """List of tool calls. Each tool call is an OpenAIChatCompletionToolCall object."""
-
-
-class ChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel):
-    text: str
-
-    type: Literal["text"]
-
-
-class ChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
-    BaseModel
-):
-    url: str
-
-    detail: Optional[str] = None
-
-
-class ChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel):
-    image_url: ChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
-
-    type: Literal["image_url"]
-
-
-ChoiceMessageOpenAIToolMessageParamContentUnionMember1: TypeAlias = Annotated[
-    Union[
-        ChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
-        ChoiceMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class ChoiceMessageOpenAIToolMessageParam(BaseModel):
-    content: Union[str, List[ChoiceMessageOpenAIToolMessageParamContentUnionMember1]]
-    """The response content from the tool"""
-
-    role: Literal["tool"]
-    """Must be "tool" to identify this as a tool response"""
-
-    tool_call_id: str
-    """Unique identifier for the tool call this response is for"""
-
-
-class ChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel):
-    text: str
-
-    type: Literal["text"]
-
-
-class ChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
-    BaseModel
-):
-    url: str
-
-    detail: Optional[str] = None
-
-
-class ChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel):
-    image_url: (
-        ChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
-    )
-
-    type: Literal["image_url"]
-
-
-ChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1: TypeAlias = Annotated[
-    Union[
-        ChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
-        ChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class ChoiceMessageOpenAIDeveloperMessageParam(BaseModel):
-    content: Union[str, List[ChoiceMessageOpenAIDeveloperMessageParamContentUnionMember1]]
-    """The content of the developer message"""
-
-    role: Literal["developer"]
-    """Must be "developer" to identify this as a developer message"""
-
-    name: Optional[str] = None
-    """(Optional) The name of the developer message participant."""
-
-
-ChoiceMessage: TypeAlias = Annotated[
-    Union[
-        ChoiceMessageOpenAIUserMessageParam,
-        ChoiceMessageOpenAISystemMessageParam,
-        ChoiceMessageOpenAIAssistantMessageParam,
-        ChoiceMessageOpenAIToolMessageParam,
-        ChoiceMessageOpenAIDeveloperMessageParam,
-    ],
-    PropertyInfo(discriminator="role"),
-]
-
-
-class ChoiceLogprobsContentTopLogprob(BaseModel):
-    token: str
-
-    logprob: float
-
-    bytes: Optional[List[int]] = None
-
-
-class ChoiceLogprobsContent(BaseModel):
-    token: str
-
-    logprob: float
-
-    top_logprobs: List[ChoiceLogprobsContentTopLogprob]
-
-    bytes: Optional[List[int]] = None
-
-
-class ChoiceLogprobsRefusalTopLogprob(BaseModel):
-    token: str
-
-    logprob: float
-
-    bytes: Optional[List[int]] = None
-
-
-class ChoiceLogprobsRefusal(BaseModel):
-    token: str
-
-    logprob: float
-
-    top_logprobs: List[ChoiceLogprobsRefusalTopLogprob]
-
-    bytes: Optional[List[int]] = None
-
-
-class ChoiceLogprobs(BaseModel):
-    content: Optional[List[ChoiceLogprobsContent]] = None
-    """(Optional) The log probabilities for the tokens in the message"""
-
-    refusal: Optional[List[ChoiceLogprobsRefusal]] = None
-    """(Optional) The log probabilities for the tokens in the message"""
-
-
-class Choice(BaseModel):
-    finish_reason: str
-    """The reason the model stopped generating"""
-
-    index: int
-    """The index of the choice"""
-
-    message: ChoiceMessage
-    """The message from the model"""
-
-    logprobs: Optional[ChoiceLogprobs] = None
-    """(Optional) The log probabilities for the tokens in the message"""
-
-
-class InputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel):
-    text: str
-
-    type: Literal["text"]
-
-
-class InputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(BaseModel):
-    url: str
-
-    detail: Optional[str] = None
-
-
-class InputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel):
-    image_url: InputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
-
-    type: Literal["image_url"]
-
-
-InputMessageOpenAIUserMessageParamContentUnionMember1: TypeAlias = Annotated[
-    Union[
-        InputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
-        InputMessageOpenAIUserMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class InputMessageOpenAIUserMessageParam(BaseModel):
-    content: Union[str, List[InputMessageOpenAIUserMessageParamContentUnionMember1]]
-    """The content of the message, which can include text and other media"""
-
-    role: Literal["user"]
-    """Must be "user" to identify this as a user message"""
-
-    name: Optional[str] = None
-    """(Optional) The name of the user message participant."""
-
-
-class InputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel):
-    text: str
-
-    type: Literal["text"]
-
-
-class InputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
-    BaseModel
-):
-    url: str
-
-    detail: Optional[str] = None
-
-
-class InputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel):
-    image_url: InputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
-
-    type: Literal["image_url"]
-
-
-InputMessageOpenAISystemMessageParamContentUnionMember1: TypeAlias = Annotated[
-    Union[
-        InputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
-        InputMessageOpenAISystemMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class InputMessageOpenAISystemMessageParam(BaseModel):
-    content: Union[str, List[InputMessageOpenAISystemMessageParamContentUnionMember1]]
-    """The content of the "system prompt".
-
-    If multiple system messages are provided, they are concatenated. The underlying
-    Llama Stack code may also add other system messages (for example, for formatting
-    tool definitions).
-    """
-
-    role: Literal["system"]
-    """Must be "system" to identify this as a system message"""
-
-    name: Optional[str] = None
-    """(Optional) The name of the system message participant."""
-
-
-class InputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel):
-    text: str
-
-    type: Literal["text"]
-
-
-class InputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
-    BaseModel
-):
-    url: str
-
-    detail: Optional[str] = None
-
-
-class InputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel):
-    image_url: (
-        InputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
-    )
-
-    type: Literal["image_url"]
-
-
-InputMessageOpenAIAssistantMessageParamContentUnionMember1: TypeAlias = Annotated[
-    Union[
-        InputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
-        InputMessageOpenAIAssistantMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class InputMessageOpenAIAssistantMessageParamToolCallFunction(BaseModel):
-    arguments: Optional[str] = None
-
-    name: Optional[str] = None
-
-
-class InputMessageOpenAIAssistantMessageParamToolCall(BaseModel):
-    type: Literal["function"]
-
-    id: Optional[str] = None
-
-    function: Optional[InputMessageOpenAIAssistantMessageParamToolCallFunction] = None
-
-    index: Optional[int] = None
-
-
-class InputMessageOpenAIAssistantMessageParam(BaseModel):
-    role: Literal["assistant"]
-    """Must be "assistant" to identify this as the model's response"""
-
-    content: Union[str, List[InputMessageOpenAIAssistantMessageParamContentUnionMember1], None] = None
-    """The content of the model's response"""
-
-    name: Optional[str] = None
-    """(Optional) The name of the assistant message participant."""
-
-    tool_calls: Optional[List[InputMessageOpenAIAssistantMessageParamToolCall]] = None
-    """List of tool calls. Each tool call is an OpenAIChatCompletionToolCall object."""
-
-
-class InputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel):
-    text: str
-
-    type: Literal["text"]
-
-
-class InputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(BaseModel):
-    url: str
-
-    detail: Optional[str] = None
-
-
-class InputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel):
-    image_url: InputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
-
-    type: Literal["image_url"]
-
-
-InputMessageOpenAIToolMessageParamContentUnionMember1: TypeAlias = Annotated[
-    Union[
-        InputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
-        InputMessageOpenAIToolMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class InputMessageOpenAIToolMessageParam(BaseModel):
-    content: Union[str, List[InputMessageOpenAIToolMessageParamContentUnionMember1]]
-    """The response content from the tool"""
-
-    role: Literal["tool"]
-    """Must be "tool" to identify this as a tool response"""
-
-    tool_call_id: str
-    """Unique identifier for the tool call this response is for"""
-
-
-class InputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam(BaseModel):
-    text: str
-
-    type: Literal["text"]
-
-
-class InputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL(
-    BaseModel
-):
-    url: str
-
-    detail: Optional[str] = None
-
-
-class InputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam(BaseModel):
-    image_url: (
-        InputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParamImageURL
-    )
-
-    type: Literal["image_url"]
-
-
-InputMessageOpenAIDeveloperMessageParamContentUnionMember1: TypeAlias = Annotated[
-    Union[
-        InputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartTextParam,
-        InputMessageOpenAIDeveloperMessageParamContentUnionMember1OpenAIChatCompletionContentPartImageParam,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class InputMessageOpenAIDeveloperMessageParam(BaseModel):
-    content: Union[str, List[InputMessageOpenAIDeveloperMessageParamContentUnionMember1]]
-    """The content of the developer message"""
-
-    role: Literal["developer"]
-    """Must be "developer" to identify this as a developer message"""
-
-    name: Optional[str] = None
-    """(Optional) The name of the developer message participant."""
-
-
-InputMessage: TypeAlias = Annotated[
-    Union[
-        InputMessageOpenAIUserMessageParam,
-        InputMessageOpenAISystemMessageParam,
-        InputMessageOpenAIAssistantMessageParam,
-        InputMessageOpenAIToolMessageParam,
-        InputMessageOpenAIDeveloperMessageParam,
-    ],
-    PropertyInfo(discriminator="role"),
-]
-
-
-class CompletionRetrieveResponse(BaseModel):
-    id: str
-    """The ID of the chat completion"""
-
-    choices: List[Choice]
-    """List of choices"""
-
-    created: int
-    """The Unix timestamp in seconds when the chat completion was created"""
-
-    input_messages: List[InputMessage]
-
-    model: str
-    """The model that was used to generate the chat completion"""
-
-    object: Literal["chat.completion"]
-    """The object type, which will be "chat.completion" """
diff --git a/src/llama_stack_client/types/chat_completion_chunk.py b/src/llama_stack_client/types/chat_completion_chunk.py
deleted file mode 100644
index 7d74663a..00000000
--- a/src/llama_stack_client/types/chat_completion_chunk.py
+++ /dev/null
@@ -1,124 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Optional
-from typing_extensions import Literal
-
-from .._models import BaseModel
-
-__all__ = [
-    "ChatCompletionChunk",
-    "Choice",
-    "ChoiceDelta",
-    "ChoiceDeltaToolCall",
-    "ChoiceDeltaToolCallFunction",
-    "ChoiceLogprobs",
-    "ChoiceLogprobsContent",
-    "ChoiceLogprobsContentTopLogprob",
-    "ChoiceLogprobsRefusal",
-    "ChoiceLogprobsRefusalTopLogprob",
-]
-
-
-class ChoiceDeltaToolCallFunction(BaseModel):
-    arguments: Optional[str] = None
-
-    name: Optional[str] = None
-
-
-class ChoiceDeltaToolCall(BaseModel):
-    type: Literal["function"]
-
-    id: Optional[str] = None
-
-    function: Optional[ChoiceDeltaToolCallFunction] = None
-
-    index: Optional[int] = None
-
-
-class ChoiceDelta(BaseModel):
-    content: Optional[str] = None
-    """(Optional) The content of the delta"""
-
-    refusal: Optional[str] = None
-    """(Optional) The refusal of the delta"""
-
-    role: Optional[str] = None
-    """(Optional) The role of the delta"""
-
-    tool_calls: Optional[List[ChoiceDeltaToolCall]] = None
-    """(Optional) The tool calls of the delta"""
-
-
-class ChoiceLogprobsContentTopLogprob(BaseModel):
-    token: str
-
-    logprob: float
-
-    bytes: Optional[List[int]] = None
-
-
-class ChoiceLogprobsContent(BaseModel):
-    token: str
-
-    logprob: float
-
-    top_logprobs: List[ChoiceLogprobsContentTopLogprob]
-
-    bytes: Optional[List[int]] = None
-
-
-class ChoiceLogprobsRefusalTopLogprob(BaseModel):
-    token: str
-
-    logprob: float
-
-    bytes: Optional[List[int]] = None
-
-
-class ChoiceLogprobsRefusal(BaseModel):
-    token: str
-
-    logprob: float
-
-    top_logprobs: List[ChoiceLogprobsRefusalTopLogprob]
-
-    bytes: Optional[List[int]] = None
-
-
-class ChoiceLogprobs(BaseModel):
-    content: Optional[List[ChoiceLogprobsContent]] = None
-    """(Optional) The log probabilities for the tokens in the message"""
-
-    refusal: Optional[List[ChoiceLogprobsRefusal]] = None
-    """(Optional) The log probabilities for the tokens in the message"""
-
-
-class Choice(BaseModel):
-    delta: ChoiceDelta
-    """The delta from the chunk"""
-
-    finish_reason: str
-    """The reason the model stopped generating"""
-
-    index: int
-    """The index of the choice"""
-
-    logprobs: Optional[ChoiceLogprobs] = None
-    """(Optional) The log probabilities for the tokens in the message"""
-
-
-class ChatCompletionChunk(BaseModel):
-    id: str
-    """The ID of the chat completion"""
-
-    choices: List[Choice]
-    """List of choices"""
-
-    created: int
-    """The Unix timestamp in seconds when the chat completion was created"""
-
-    model: str
-    """The model that was used to generate the chat completion"""
-
-    object: Literal["chat.completion.chunk"]
-    """The object type, which will be "chat.completion.chunk" """
diff --git a/src/llama_stack_client/types/chat_completion_response_stream_chunk.py b/src/llama_stack_client/types/chat_completion_response_stream_chunk.py
deleted file mode 100644
index 3c236fd4..00000000
--- a/src/llama_stack_client/types/chat_completion_response_stream_chunk.py
+++ /dev/null
@@ -1,42 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Optional
-from typing_extensions import Literal
-
-from .._models import BaseModel
-from .token_log_probs import TokenLogProbs
-from .shared.content_delta import ContentDelta
-
-__all__ = ["ChatCompletionResponseStreamChunk", "Event", "Metric"]
-
-
-class Event(BaseModel):
-    delta: ContentDelta
-    """Content generated since last event.
-
-    This can be one or more tokens, or a tool call.
-    """
-
-    event_type: Literal["start", "complete", "progress"]
-    """Type of the event"""
-
-    logprobs: Optional[List[TokenLogProbs]] = None
-    """Optional log probabilities for generated tokens"""
-
-    stop_reason: Optional[Literal["end_of_turn", "end_of_message", "out_of_tokens"]] = None
-    """Optional reason why generation stopped, if complete"""
-
-
-class Metric(BaseModel):
-    metric: str
-
-    value: float
-
-    unit: Optional[str] = None
-
-
-class ChatCompletionResponseStreamChunk(BaseModel):
-    event: Event
-    """The event containing the new content"""
-
-    metrics: Optional[List[Metric]] = None
diff --git a/src/llama_stack_client/types/completion_create_params.py b/src/llama_stack_client/types/completion_create_params.py
deleted file mode 100644
index 50900e25..00000000
--- a/src/llama_stack_client/types/completion_create_params.py
+++ /dev/null
@@ -1,82 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, List, Union, Iterable
-from typing_extensions import Literal, Required, TypedDict
-
-__all__ = ["CompletionCreateParamsBase", "CompletionCreateParamsNonStreaming", "CompletionCreateParamsStreaming"]
-
-
-class CompletionCreateParamsBase(TypedDict, total=False):
-    model: Required[str]
-    """The identifier of the model to use.
-
-    The model must be registered with Llama Stack and available via the /models
-    endpoint.
-    """
-
-    prompt: Required[Union[str, List[str], Iterable[int], Iterable[Iterable[int]]]]
-    """The prompt to generate a completion for."""
-
-    best_of: int
-    """(Optional) The number of completions to generate."""
-
-    echo: bool
-    """(Optional) Whether to echo the prompt."""
-
-    frequency_penalty: float
-    """(Optional) The penalty for repeated tokens."""
-
-    guided_choice: List[str]
-
-    logit_bias: Dict[str, float]
-    """(Optional) The logit bias to use."""
-
-    logprobs: bool
-    """(Optional) The log probabilities to use."""
-
-    max_tokens: int
-    """(Optional) The maximum number of tokens to generate."""
-
-    n: int
-    """(Optional) The number of completions to generate."""
-
-    presence_penalty: float
-    """(Optional) The penalty for repeated tokens."""
-
-    prompt_logprobs: int
-
-    seed: int
-    """(Optional) The seed to use."""
-
-    stop: Union[str, List[str]]
-    """(Optional) The stop tokens to use."""
-
-    stream_options: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
-    """(Optional) The stream options to use."""
-
-    suffix: str
-    """(Optional) The suffix that should be appended to the completion."""
-
-    temperature: float
-    """(Optional) The temperature to use."""
-
-    top_p: float
-    """(Optional) The top p to use."""
-
-    user: str
-    """(Optional) The user to use."""
-
-
-class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase, total=False):
-    stream: Literal[False]
-    """(Optional) Whether to stream the response."""
-
-
-class CompletionCreateParamsStreaming(CompletionCreateParamsBase):
-    stream: Required[Literal[True]]
-    """(Optional) Whether to stream the response."""
-
-
-CompletionCreateParams = Union[CompletionCreateParamsNonStreaming, CompletionCreateParamsStreaming]
diff --git a/src/llama_stack_client/types/completion_create_response.py b/src/llama_stack_client/types/completion_create_response.py
deleted file mode 100644
index 0c43e68a..00000000
--- a/src/llama_stack_client/types/completion_create_response.py
+++ /dev/null
@@ -1,86 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Optional
-from typing_extensions import Literal
-
-from .._models import BaseModel
-
-__all__ = [
-    "CompletionCreateResponse",
-    "Choice",
-    "ChoiceLogprobs",
-    "ChoiceLogprobsContent",
-    "ChoiceLogprobsContentTopLogprob",
-    "ChoiceLogprobsRefusal",
-    "ChoiceLogprobsRefusalTopLogprob",
-]
-
-
-class ChoiceLogprobsContentTopLogprob(BaseModel):
-    token: str
-
-    logprob: float
-
-    bytes: Optional[List[int]] = None
-
-
-class ChoiceLogprobsContent(BaseModel):
-    token: str
-
-    logprob: float
-
-    top_logprobs: List[ChoiceLogprobsContentTopLogprob]
-
-    bytes: Optional[List[int]] = None
-
-
-class ChoiceLogprobsRefusalTopLogprob(BaseModel):
-    token: str
-
-    logprob: float
-
-    bytes: Optional[List[int]] = None
-
-
-class ChoiceLogprobsRefusal(BaseModel):
-    token: str
-
-    logprob: float
-
-    top_logprobs: List[ChoiceLogprobsRefusalTopLogprob]
-
-    bytes: Optional[List[int]] = None
-
-
-class ChoiceLogprobs(BaseModel):
-    content: Optional[List[ChoiceLogprobsContent]] = None
-    """(Optional) The log probabilities for the tokens in the message"""
-
-    refusal: Optional[List[ChoiceLogprobsRefusal]] = None
-    """(Optional) The log probabilities for the tokens in the message"""
-
-
-class Choice(BaseModel):
-    finish_reason: str
-
-    index: int
-
-    text: str
-
-    logprobs: Optional[ChoiceLogprobs] = None
-    """
-    The log probabilities for the tokens in the message from an OpenAI-compatible
-    chat completion response.
-    """
-
-
-class CompletionCreateResponse(BaseModel):
-    id: str
-
-    choices: List[Choice]
-
-    created: int
-
-    model: str
-
-    object: Literal["text_completion"]
diff --git a/src/llama_stack_client/types/completion_response.py b/src/llama_stack_client/types/completion_response.py
deleted file mode 100644
index 78254b28..00000000
--- a/src/llama_stack_client/types/completion_response.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Optional
-from typing_extensions import Literal
-
-from .._models import BaseModel
-from .token_log_probs import TokenLogProbs
-
-__all__ = ["CompletionResponse", "Metric"]
-
-
-class Metric(BaseModel):
-    metric: str
-
-    value: float
-
-    unit: Optional[str] = None
-
-
-class CompletionResponse(BaseModel):
-    content: str
-    """The generated completion text"""
-
-    stop_reason: Literal["end_of_turn", "end_of_message", "out_of_tokens"]
-    """Reason why generation stopped"""
-
-    logprobs: Optional[List[TokenLogProbs]] = None
-    """Optional log probabilities for generated tokens"""
-
-    metrics: Optional[List[Metric]] = None
diff --git a/src/llama_stack_client/types/create_embeddings_response.py b/src/llama_stack_client/types/create_embeddings_response.py
deleted file mode 100644
index b5d04f1b..00000000
--- a/src/llama_stack_client/types/create_embeddings_response.py
+++ /dev/null
@@ -1,44 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Union
-from typing_extensions import Literal
-
-from .._models import BaseModel
-
-__all__ = ["CreateEmbeddingsResponse", "Data", "Usage"]
-
-
-class Data(BaseModel):
-    embedding: Union[List[float], str]
-    """
-    The embedding vector as a list of floats (when encoding_format="float") or as a
-    base64-encoded string (when encoding_format="base64")
-    """
-
-    index: int
-    """The index of the embedding in the input list"""
-
-    object: Literal["embedding"]
-    """The object type, which will be "embedding" """
-
-
-class Usage(BaseModel):
-    prompt_tokens: int
-    """The number of tokens in the input"""
-
-    total_tokens: int
-    """The total number of tokens used"""
-
-
-class CreateEmbeddingsResponse(BaseModel):
-    data: List[Data]
-    """List of embedding data objects"""
-
-    model: str
-    """The model that was used to generate the embeddings"""
-
-    object: Literal["list"]
-    """The object type, which will be "list" """
-
-    usage: Usage
-    """Usage information"""
diff --git a/src/llama_stack_client/types/dataset_iterrows_params.py b/src/llama_stack_client/types/dataset_iterrows_params.py
deleted file mode 100644
index 99065312..00000000
--- a/src/llama_stack_client/types/dataset_iterrows_params.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import TypedDict
-
-__all__ = ["DatasetIterrowsParams"]
-
-
-class DatasetIterrowsParams(TypedDict, total=False):
-    limit: int
-    """The number of rows to get."""
-
-    start_index: int
-    """Index into dataset for the first row to get. Get all rows if None."""
diff --git a/src/llama_stack_client/types/dataset_iterrows_response.py b/src/llama_stack_client/types/dataset_iterrows_response.py
deleted file mode 100644
index 8681b018..00000000
--- a/src/llama_stack_client/types/dataset_iterrows_response.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-
-from .._models import BaseModel
-
-__all__ = ["DatasetIterrowsResponse"]
-
-
-class DatasetIterrowsResponse(BaseModel):
-    data: List[Dict[str, Union[bool, float, str, List[object], object, None]]]
-    """The list of items for the current page"""
-
-    has_more: bool
-    """Whether there are more items available after this set"""
-
-    url: Optional[str] = None
-    """The URL for accessing this list"""
diff --git a/src/llama_stack_client/types/dataset_list_response.py b/src/llama_stack_client/types/dataset_list_response.py
deleted file mode 100644
index 42b27ab4..00000000
--- a/src/llama_stack_client/types/dataset_list_response.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-from typing_extensions import Literal, Annotated, TypeAlias
-
-from .._utils import PropertyInfo
-from .._models import BaseModel
-
-__all__ = [
-    "DatasetListResponse",
-    "DatasetListResponseItem",
-    "DatasetListResponseItemSource",
-    "DatasetListResponseItemSourceUriDataSource",
-    "DatasetListResponseItemSourceRowsDataSource",
-]
-
-
-class DatasetListResponseItemSourceUriDataSource(BaseModel):
-    type: Literal["uri"]
-
-    uri: str
-    """The dataset can be obtained from a URI.
-
-    E.g. - "https://mywebsite.com/mydata.jsonl" - "lsfs://mydata.jsonl" -
-    "data:csv;base64,{base64_content}"
-    """
-
-
-class DatasetListResponseItemSourceRowsDataSource(BaseModel):
-    rows: List[Dict[str, Union[bool, float, str, List[object], object, None]]]
-    """The dataset is stored in rows.
-
-    E.g. - [ {"messages": [{"role": "user", "content": "Hello, world!"}, {"role":
-    "assistant", "content": "Hello, world!"}]} ]
-    """
-
-    type: Literal["rows"]
-
-
-DatasetListResponseItemSource: TypeAlias = Annotated[
-    Union[DatasetListResponseItemSourceUriDataSource, DatasetListResponseItemSourceRowsDataSource],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class DatasetListResponseItem(BaseModel):
-    identifier: str
-
-    metadata: Dict[str, Union[bool, float, str, List[object], object, None]]
-
-    provider_id: str
-
-    purpose: Literal["post-training/messages", "eval/question-answer", "eval/messages-answer"]
-    """Purpose of the dataset. Each purpose has a required input data schema."""
-
-    source: DatasetListResponseItemSource
-    """A dataset that can be obtained from a URI."""
-
-    type: Literal["dataset"]
-
-    provider_resource_id: Optional[str] = None
-
-
-DatasetListResponse: TypeAlias = List[DatasetListResponseItem]
diff --git a/src/llama_stack_client/types/dataset_register_params.py b/src/llama_stack_client/types/dataset_register_params.py
deleted file mode 100644
index 6fd5db3f..00000000
--- a/src/llama_stack_client/types/dataset_register_params.py
+++ /dev/null
@@ -1,69 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union, Iterable
-from typing_extensions import Literal, Required, TypeAlias, TypedDict
-
-__all__ = ["DatasetRegisterParams", "Source", "SourceUriDataSource", "SourceRowsDataSource"]
-
-
-class DatasetRegisterParams(TypedDict, total=False):
-    purpose: Required[Literal["post-training/messages", "eval/question-answer", "eval/messages-answer"]]
-    """The purpose of the dataset.
-
-    One of: - "post-training/messages": The dataset contains a messages column with
-    list of messages for post-training. { "messages": [ {"role": "user", "content":
-    "Hello, world!"}, {"role": "assistant", "content": "Hello, world!"}, ] } -
-    "eval/question-answer": The dataset contains a question column and an answer
-    column for evaluation. { "question": "What is the capital of France?", "answer":
-    "Paris" } - "eval/messages-answer": The dataset contains a messages column with
-    list of messages and an answer column for evaluation. { "messages": [ {"role":
-    "user", "content": "Hello, my name is John Doe."}, {"role": "assistant",
-    "content": "Hello, John Doe. How can I help you today?"}, {"role": "user",
-    "content": "What's my name?"}, ], "answer": "John Doe" }
-    """
-
-    source: Required[Source]
-    """The data source of the dataset.
-
-    Ensure that the data source schema is compatible with the purpose of the
-    dataset. Examples: - { "type": "uri", "uri":
-    "https://mywebsite.com/mydata.jsonl" } - { "type": "uri", "uri":
-    "lsfs://mydata.jsonl" } - { "type": "uri", "uri":
-    "data:csv;base64,{base64_content}" } - { "type": "uri", "uri":
-    "huggingface://llamastack/simpleqa?split=train" } - { "type": "rows", "rows": [
-    { "messages": [ {"role": "user", "content": "Hello, world!"}, {"role":
-    "assistant", "content": "Hello, world!"}, ] } ] }
-    """
-
-    dataset_id: str
-    """The ID of the dataset. If not provided, an ID will be generated."""
-
-    metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
-    """The metadata for the dataset. - E.g. {"description": "My dataset"}."""
-
-
-class SourceUriDataSource(TypedDict, total=False):
-    type: Required[Literal["uri"]]
-
-    uri: Required[str]
-    """The dataset can be obtained from a URI.
-
-    E.g. - "https://mywebsite.com/mydata.jsonl" - "lsfs://mydata.jsonl" -
-    "data:csv;base64,{base64_content}"
-    """
-
-
-class SourceRowsDataSource(TypedDict, total=False):
-    rows: Required[Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]]
-    """The dataset is stored in rows.
-
-    E.g. - [ {"messages": [{"role": "user", "content": "Hello, world!"}, {"role":
-    "assistant", "content": "Hello, world!"}]} ]
-    """
-
-    type: Required[Literal["rows"]]
-
-
-Source: TypeAlias = Union[SourceUriDataSource, SourceRowsDataSource]
diff --git a/src/llama_stack_client/types/dataset_register_response.py b/src/llama_stack_client/types/dataset_register_response.py
deleted file mode 100644
index a79367bb..00000000
--- a/src/llama_stack_client/types/dataset_register_response.py
+++ /dev/null
@@ -1,52 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-from typing_extensions import Literal, Annotated, TypeAlias
-
-from .._utils import PropertyInfo
-from .._models import BaseModel
-
-__all__ = ["DatasetRegisterResponse", "Source", "SourceUriDataSource", "SourceRowsDataSource"]
-
-
-class SourceUriDataSource(BaseModel):
-    type: Literal["uri"]
-
-    uri: str
-    """The dataset can be obtained from a URI.
-
-    E.g. - "https://mywebsite.com/mydata.jsonl" - "lsfs://mydata.jsonl" -
-    "data:csv;base64,{base64_content}"
-    """
-
-
-class SourceRowsDataSource(BaseModel):
-    rows: List[Dict[str, Union[bool, float, str, List[object], object, None]]]
-    """The dataset is stored in rows.
-
-    E.g. - [ {"messages": [{"role": "user", "content": "Hello, world!"}, {"role":
-    "assistant", "content": "Hello, world!"}]} ]
-    """
-
-    type: Literal["rows"]
-
-
-Source: TypeAlias = Annotated[Union[SourceUriDataSource, SourceRowsDataSource], PropertyInfo(discriminator="type")]
-
-
-class DatasetRegisterResponse(BaseModel):
-    identifier: str
-
-    metadata: Dict[str, Union[bool, float, str, List[object], object, None]]
-
-    provider_id: str
-
-    purpose: Literal["post-training/messages", "eval/question-answer", "eval/messages-answer"]
-    """Purpose of the dataset. Each purpose has a required input data schema."""
-
-    source: Source
-    """A dataset that can be obtained from a URI."""
-
-    type: Literal["dataset"]
-
-    provider_resource_id: Optional[str] = None
diff --git a/src/llama_stack_client/types/dataset_retrieve_response.py b/src/llama_stack_client/types/dataset_retrieve_response.py
deleted file mode 100644
index ab96c387..00000000
--- a/src/llama_stack_client/types/dataset_retrieve_response.py
+++ /dev/null
@@ -1,52 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-from typing_extensions import Literal, Annotated, TypeAlias
-
-from .._utils import PropertyInfo
-from .._models import BaseModel
-
-__all__ = ["DatasetRetrieveResponse", "Source", "SourceUriDataSource", "SourceRowsDataSource"]
-
-
-class SourceUriDataSource(BaseModel):
-    type: Literal["uri"]
-
-    uri: str
-    """The dataset can be obtained from a URI.
-
-    E.g. - "https://mywebsite.com/mydata.jsonl" - "lsfs://mydata.jsonl" -
-    "data:csv;base64,{base64_content}"
-    """
-
-
-class SourceRowsDataSource(BaseModel):
-    rows: List[Dict[str, Union[bool, float, str, List[object], object, None]]]
-    """The dataset is stored in rows.
-
-    E.g. - [ {"messages": [{"role": "user", "content": "Hello, world!"}, {"role":
-    "assistant", "content": "Hello, world!"}]} ]
-    """
-
-    type: Literal["rows"]
-
-
-Source: TypeAlias = Annotated[Union[SourceUriDataSource, SourceRowsDataSource], PropertyInfo(discriminator="type")]
-
-
-class DatasetRetrieveResponse(BaseModel):
-    identifier: str
-
-    metadata: Dict[str, Union[bool, float, str, List[object], object, None]]
-
-    provider_id: str
-
-    purpose: Literal["post-training/messages", "eval/question-answer", "eval/messages-answer"]
-    """Purpose of the dataset. Each purpose has a required input data schema."""
-
-    source: Source
-    """A dataset that can be obtained from a URI."""
-
-    type: Literal["dataset"]
-
-    provider_resource_id: Optional[str] = None
diff --git a/src/llama_stack_client/types/delete_file_response.py b/src/llama_stack_client/types/delete_file_response.py
deleted file mode 100644
index 2188556f..00000000
--- a/src/llama_stack_client/types/delete_file_response.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing_extensions import Literal
-
-from .._models import BaseModel
-
-__all__ = ["DeleteFileResponse"]
-
-
-class DeleteFileResponse(BaseModel):
-    id: str
-    """The file identifier that was deleted"""
-
-    deleted: bool
-    """Whether the file was successfully deleted"""
-
-    object: Literal["file"]
-    """The object type, which is always "file" """
diff --git a/src/llama_stack_client/types/embedding_create_params.py b/src/llama_stack_client/types/embedding_create_params.py
deleted file mode 100644
index c87096eb..00000000
--- a/src/llama_stack_client/types/embedding_create_params.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import List, Union
-from typing_extensions import Required, TypedDict
-
-__all__ = ["EmbeddingCreateParams"]
-
-
-class EmbeddingCreateParams(TypedDict, total=False):
-    input: Required[Union[str, List[str]]]
-    """Input text to embed, encoded as a string or array of strings.
-
-    To embed multiple inputs in a single request, pass an array of strings.
-    """
-
-    model: Required[str]
-    """The identifier of the model to use.
-
-    The model must be an embedding model registered with Llama Stack and available
-    via the /models endpoint.
-    """
-
-    dimensions: int
-    """(Optional) The number of dimensions the resulting output embeddings should have.
-
-    Only supported in text-embedding-3 and later models.
-    """
-
-    encoding_format: str
-    """(Optional) The format to return the embeddings in.
-
-    Can be either "float" or "base64". Defaults to "float".
-    """
-
-    user: str
-    """
-    (Optional) A unique identifier representing your end-user, which can help OpenAI
-    to monitor and detect abuse.
-    """
diff --git a/src/llama_stack_client/types/embeddings_response.py b/src/llama_stack_client/types/embeddings_response.py
deleted file mode 100644
index f36c6b97..00000000
--- a/src/llama_stack_client/types/embeddings_response.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List
-
-from .._models import BaseModel
-
-__all__ = ["EmbeddingsResponse"]
-
-
-class EmbeddingsResponse(BaseModel):
-    embeddings: List[List[float]]
-    """List of embedding vectors, one per input content.
-
-    Each embedding is a list of floats. The dimensionality of the embedding is
-    model-specific; you can check model metadata using /models/{model_id}
-    """
diff --git a/src/llama_stack_client/types/eval_candidate_param.py b/src/llama_stack_client/types/eval_candidate_param.py
deleted file mode 100644
index be1b21c8..00000000
--- a/src/llama_stack_client/types/eval_candidate_param.py
+++ /dev/null
@@ -1,35 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Union
-from typing_extensions import Literal, Required, TypeAlias, TypedDict
-
-from .shared_params.agent_config import AgentConfig
-from .shared_params.system_message import SystemMessage
-from .shared_params.sampling_params import SamplingParams
-
-__all__ = ["EvalCandidateParam", "ModelCandidate", "AgentCandidate"]
-
-
-class ModelCandidate(TypedDict, total=False):
-    model: Required[str]
-    """The model ID to evaluate."""
-
-    sampling_params: Required[SamplingParams]
-    """The sampling parameters for the model."""
-
-    type: Required[Literal["model"]]
-
-    system_message: SystemMessage
-    """(Optional) The system message providing instructions or context to the model."""
-
-
-class AgentCandidate(TypedDict, total=False):
-    config: Required[AgentConfig]
-    """The configuration for the agent candidate."""
-
-    type: Required[Literal["agent"]]
-
-
-EvalCandidateParam: TypeAlias = Union[ModelCandidate, AgentCandidate]
diff --git a/src/llama_stack_client/types/eval_evaluate_rows_alpha_params.py b/src/llama_stack_client/types/eval_evaluate_rows_alpha_params.py
deleted file mode 100644
index eb9443b0..00000000
--- a/src/llama_stack_client/types/eval_evaluate_rows_alpha_params.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, List, Union, Iterable
-from typing_extensions import Required, TypedDict
-
-from .benchmark_config_param import BenchmarkConfigParam
-
-__all__ = ["EvalEvaluateRowsAlphaParams"]
-
-
-class EvalEvaluateRowsAlphaParams(TypedDict, total=False):
-    benchmark_config: Required[BenchmarkConfigParam]
-    """The configuration for the benchmark."""
-
-    input_rows: Required[Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]]
-    """The rows to evaluate."""
-
-    scoring_functions: Required[List[str]]
-    """The scoring functions to use for the evaluation."""
diff --git a/src/llama_stack_client/types/eval_evaluate_rows_params.py b/src/llama_stack_client/types/eval_evaluate_rows_params.py
deleted file mode 100644
index 37e7b978..00000000
--- a/src/llama_stack_client/types/eval_evaluate_rows_params.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, List, Union, Iterable
-from typing_extensions import Required, TypedDict
-
-from .benchmark_config_param import BenchmarkConfigParam
-
-__all__ = ["EvalEvaluateRowsParams"]
-
-
-class EvalEvaluateRowsParams(TypedDict, total=False):
-    benchmark_config: Required[BenchmarkConfigParam]
-    """The configuration for the benchmark."""
-
-    input_rows: Required[Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]]
-    """The rows to evaluate."""
-
-    scoring_functions: Required[List[str]]
-    """The scoring functions to use for the evaluation."""
diff --git a/src/llama_stack_client/types/eval_run_eval_alpha_params.py b/src/llama_stack_client/types/eval_run_eval_alpha_params.py
deleted file mode 100644
index e07393b3..00000000
--- a/src/llama_stack_client/types/eval_run_eval_alpha_params.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Required, TypedDict
-
-from .benchmark_config_param import BenchmarkConfigParam
-
-__all__ = ["EvalRunEvalAlphaParams"]
-
-
-class EvalRunEvalAlphaParams(TypedDict, total=False):
-    benchmark_config: Required[BenchmarkConfigParam]
-    """The configuration for the benchmark."""
diff --git a/src/llama_stack_client/types/eval_run_eval_params.py b/src/llama_stack_client/types/eval_run_eval_params.py
deleted file mode 100644
index 33596fc2..00000000
--- a/src/llama_stack_client/types/eval_run_eval_params.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Required, TypedDict
-
-from .benchmark_config_param import BenchmarkConfigParam
-
-__all__ = ["EvalRunEvalParams"]
-
-
-class EvalRunEvalParams(TypedDict, total=False):
-    benchmark_config: Required[BenchmarkConfigParam]
-    """The configuration for the benchmark."""
diff --git a/src/llama_stack_client/types/evaluate_response.py b/src/llama_stack_client/types/evaluate_response.py
deleted file mode 100644
index 8e463352..00000000
--- a/src/llama_stack_client/types/evaluate_response.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union
-
-from .._models import BaseModel
-from .shared.scoring_result import ScoringResult
-
-__all__ = ["EvaluateResponse"]
-
-
-class EvaluateResponse(BaseModel):
-    generations: List[Dict[str, Union[bool, float, str, List[object], object, None]]]
-    """The generations from the evaluation."""
-
-    scores: Dict[str, ScoringResult]
-    """The scores from the evaluation."""
diff --git a/src/llama_stack_client/types/event_param.py b/src/llama_stack_client/types/event_param.py
deleted file mode 100644
index 500e4a24..00000000
--- a/src/llama_stack_client/types/event_param.py
+++ /dev/null
@@ -1,89 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union
-from datetime import datetime
-from typing_extensions import Literal, Required, Annotated, TypeAlias, TypedDict
-
-from .._utils import PropertyInfo
-
-__all__ = [
-    "EventParam",
-    "UnstructuredLogEvent",
-    "MetricEvent",
-    "StructuredLogEvent",
-    "StructuredLogEventPayload",
-    "StructuredLogEventPayloadSpanStartPayload",
-    "StructuredLogEventPayloadSpanEndPayload",
-]
-
-
-class UnstructuredLogEvent(TypedDict, total=False):
-    message: Required[str]
-
-    severity: Required[Literal["verbose", "debug", "info", "warn", "error", "critical"]]
-
-    span_id: Required[str]
-
-    timestamp: Required[Annotated[Union[str, datetime], PropertyInfo(format="iso8601")]]
-
-    trace_id: Required[str]
-
-    type: Required[Literal["unstructured_log"]]
-
-    attributes: Dict[str, Union[str, float, bool, None]]
-
-
-class MetricEvent(TypedDict, total=False):
-    metric: Required[str]
-
-    span_id: Required[str]
-
-    timestamp: Required[Annotated[Union[str, datetime], PropertyInfo(format="iso8601")]]
-
-    trace_id: Required[str]
-
-    type: Required[Literal["metric"]]
-
-    unit: Required[str]
-
-    value: Required[float]
-
-    attributes: Dict[str, Union[str, float, bool, None]]
-
-
-class StructuredLogEventPayloadSpanStartPayload(TypedDict, total=False):
-    name: Required[str]
-
-    type: Required[Literal["span_start"]]
-
-    parent_span_id: str
-
-
-class StructuredLogEventPayloadSpanEndPayload(TypedDict, total=False):
-    status: Required[Literal["ok", "error"]]
-
-    type: Required[Literal["span_end"]]
-
-
-StructuredLogEventPayload: TypeAlias = Union[
-    StructuredLogEventPayloadSpanStartPayload, StructuredLogEventPayloadSpanEndPayload
-]
-
-
-class StructuredLogEvent(TypedDict, total=False):
-    payload: Required[StructuredLogEventPayload]
-
-    span_id: Required[str]
-
-    timestamp: Required[Annotated[Union[str, datetime], PropertyInfo(format="iso8601")]]
-
-    trace_id: Required[str]
-
-    type: Required[Literal["structured_log"]]
-
-    attributes: Dict[str, Union[str, float, bool, None]]
-
-
-EventParam: TypeAlias = Union[UnstructuredLogEvent, MetricEvent, StructuredLogEvent]
diff --git a/src/llama_stack_client/types/file.py b/src/llama_stack_client/types/file.py
deleted file mode 100644
index 74ead6b7..00000000
--- a/src/llama_stack_client/types/file.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing_extensions import Literal
-
-from .._models import BaseModel
-
-__all__ = ["File"]
-
-
-class File(BaseModel):
-    id: str
-    """The file identifier, which can be referenced in the API endpoints"""
-
-    bytes: int
-    """The size of the file, in bytes"""
-
-    created_at: int
-    """The Unix timestamp (in seconds) for when the file was created"""
-
-    expires_at: int
-    """The Unix timestamp (in seconds) for when the file expires"""
-
-    filename: str
-    """The name of the file"""
-
-    object: Literal["file"]
-    """The object type, which is always "file" """
-
-    purpose: Literal["assistants"]
-    """The intended purpose of the file"""
diff --git a/src/llama_stack_client/types/file_create_params.py b/src/llama_stack_client/types/file_create_params.py
deleted file mode 100644
index 8342aad2..00000000
--- a/src/llama_stack_client/types/file_create_params.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Literal, Required, TypedDict
-
-from .._types import FileTypes
-
-__all__ = ["FileCreateParams"]
-
-
-class FileCreateParams(TypedDict, total=False):
-    file: Required[FileTypes]
-
-    purpose: Required[Literal["assistants"]]
-    """Valid purpose values for OpenAI Files API."""
diff --git a/src/llama_stack_client/types/file_list_params.py b/src/llama_stack_client/types/file_list_params.py
deleted file mode 100644
index 3f7d6ed5..00000000
--- a/src/llama_stack_client/types/file_list_params.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Literal, TypedDict
-
-__all__ = ["FileListParams"]
-
-
-class FileListParams(TypedDict, total=False):
-    after: str
-    """A cursor for use in pagination.
-
-    `after` is an object ID that defines your place in the list. For instance, if
-    you make a list request and receive 100 objects, ending with obj_foo, your
-    subsequent call can include after=obj_foo in order to fetch the next page of the
-    list.
-    """
-
-    limit: int
-    """A limit on the number of objects to be returned.
-
-    Limit can range between 1 and 10,000, and the default is 10,000.
-    """
-
-    order: Literal["asc", "desc"]
-    """Sort order by the `created_at` timestamp of the objects.
-
-    `asc` for ascending order and `desc` for descending order.
-    """
-
-    purpose: Literal["assistants"]
-    """Only return files with the given purpose."""
diff --git a/src/llama_stack_client/types/health_info.py b/src/llama_stack_client/types/health_info.py
deleted file mode 100644
index 3441ddd1..00000000
--- a/src/llama_stack_client/types/health_info.py
+++ /dev/null
@@ -1,11 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing_extensions import Literal
-
-from .._models import BaseModel
-
-__all__ = ["HealthInfo"]
-
-
-class HealthInfo(BaseModel):
-    status: Literal["OK", "Error", "Not Implemented"]
diff --git a/src/llama_stack_client/types/inference_batch_chat_completion_params.py b/src/llama_stack_client/types/inference_batch_chat_completion_params.py
deleted file mode 100644
index b5da0f0e..00000000
--- a/src/llama_stack_client/types/inference_batch_chat_completion_params.py
+++ /dev/null
@@ -1,85 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union, Iterable
-from typing_extensions import Literal, Required, TypedDict
-
-from .shared_params.message import Message
-from .shared_params.response_format import ResponseFormat
-from .shared_params.sampling_params import SamplingParams
-from .shared_params.tool_param_definition import ToolParamDefinition
-
-__all__ = ["InferenceBatchChatCompletionParams", "Logprobs", "ToolConfig", "Tool"]
-
-
-class InferenceBatchChatCompletionParams(TypedDict, total=False):
-    messages_batch: Required[Iterable[Iterable[Message]]]
-    """The messages to generate completions for."""
-
-    model_id: Required[str]
-    """The identifier of the model to use.
-
-    The model must be registered with Llama Stack and available via the /models
-    endpoint.
-    """
-
-    logprobs: Logprobs
-    """
-    (Optional) If specified, log probabilities for each token position will be
-    returned.
-    """
-
-    response_format: ResponseFormat
-    """(Optional) Grammar specification for guided (structured) decoding."""
-
-    sampling_params: SamplingParams
-    """(Optional) Parameters to control the sampling strategy."""
-
-    tool_config: ToolConfig
-    """(Optional) Configuration for tool use."""
-
-    tools: Iterable[Tool]
-    """(Optional) List of tool definitions available to the model."""
-
-
-class Logprobs(TypedDict, total=False):
-    top_k: int
-    """How many tokens (for each position) to return log probabilities for."""
-
-
-class ToolConfig(TypedDict, total=False):
-    system_message_behavior: Literal["append", "replace"]
-    """(Optional) Config for how to override the default system prompt.
-
-    - `SystemMessageBehavior.append`: Appends the provided system message to the
-      default system prompt. - `SystemMessageBehavior.replace`: Replaces the default
-      system prompt with the provided system message. The system message can include
-      the string '{{function_definitions}}' to indicate where the function
-      definitions should be inserted.
-    """
-
-    tool_choice: Union[Literal["auto", "required", "none"], str]
-    """(Optional) Whether tool use is automatic, required, or none.
-
-    Can also specify a tool name to use a specific tool. Defaults to
-    ToolChoice.auto.
-    """
-
-    tool_prompt_format: Literal["json", "function_tag", "python_list"]
-    """(Optional) Instructs the model how to format tool calls.
-
-    By default, Llama Stack will attempt to use a format that is best adapted to the
-    model. - `ToolPromptFormat.json`: The tool calls are formatted as a JSON
-    object. - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a
-    <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls
-    are output as Python syntax -- a list of function calls.
-    """
-
-
-class Tool(TypedDict, total=False):
-    tool_name: Required[Union[Literal["brave_search", "wolfram_alpha", "photogen", "code_interpreter"], str]]
-
-    description: str
-
-    parameters: Dict[str, ToolParamDefinition]
diff --git a/src/llama_stack_client/types/inference_batch_chat_completion_response.py b/src/llama_stack_client/types/inference_batch_chat_completion_response.py
deleted file mode 100644
index 84d6c425..00000000
--- a/src/llama_stack_client/types/inference_batch_chat_completion_response.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List
-
-from .._models import BaseModel
-from .shared.chat_completion_response import ChatCompletionResponse
-
-__all__ = ["InferenceBatchChatCompletionResponse"]
-
-
-class InferenceBatchChatCompletionResponse(BaseModel):
-    batch: List[ChatCompletionResponse]
diff --git a/src/llama_stack_client/types/inference_batch_completion_params.py b/src/llama_stack_client/types/inference_batch_completion_params.py
deleted file mode 100644
index d3db8e13..00000000
--- a/src/llama_stack_client/types/inference_batch_completion_params.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import List
-from typing_extensions import Required, TypedDict
-
-from .shared_params.response_format import ResponseFormat
-from .shared_params.sampling_params import SamplingParams
-from .shared_params.interleaved_content import InterleavedContent
-
-__all__ = ["InferenceBatchCompletionParams", "Logprobs"]
-
-
-class InferenceBatchCompletionParams(TypedDict, total=False):
-    content_batch: Required[List[InterleavedContent]]
-    """The content to generate completions for."""
-
-    model_id: Required[str]
-    """The identifier of the model to use.
-
-    The model must be registered with Llama Stack and available via the /models
-    endpoint.
-    """
-
-    logprobs: Logprobs
-    """
-    (Optional) If specified, log probabilities for each token position will be
-    returned.
-    """
-
-    response_format: ResponseFormat
-    """(Optional) Grammar specification for guided (structured) decoding."""
-
-    sampling_params: SamplingParams
-    """(Optional) Parameters to control the sampling strategy."""
-
-
-class Logprobs(TypedDict, total=False):
-    top_k: int
-    """How many tokens (for each position) to return log probabilities for."""
diff --git a/src/llama_stack_client/types/inference_chat_completion_params.py b/src/llama_stack_client/types/inference_chat_completion_params.py
deleted file mode 100644
index 746d3dee..00000000
--- a/src/llama_stack_client/types/inference_chat_completion_params.py
+++ /dev/null
@@ -1,134 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union, Iterable
-from typing_extensions import Literal, Required, TypedDict
-
-from .shared_params.message import Message
-from .shared_params.response_format import ResponseFormat
-from .shared_params.sampling_params import SamplingParams
-from .shared_params.tool_param_definition import ToolParamDefinition
-
-__all__ = [
-    "InferenceChatCompletionParamsBase",
-    "Logprobs",
-    "ToolConfig",
-    "Tool",
-    "InferenceChatCompletionParamsNonStreaming",
-    "InferenceChatCompletionParamsStreaming",
-]
-
-
-class InferenceChatCompletionParamsBase(TypedDict, total=False):
-    messages: Required[Iterable[Message]]
-    """List of messages in the conversation."""
-
-    model_id: Required[str]
-    """The identifier of the model to use.
-
-    The model must be registered with Llama Stack and available via the /models
-    endpoint.
-    """
-
-    logprobs: Logprobs
-    """
-    (Optional) If specified, log probabilities for each token position will be
-    returned.
-    """
-
-    response_format: ResponseFormat
-    """(Optional) Grammar specification for guided (structured) decoding.
-
-    There are two options: - `ResponseFormat.json_schema`: The grammar is a JSON
-    schema. Most providers support this format. - `ResponseFormat.grammar`: The
-    grammar is a BNF grammar. This format is more flexible, but not all providers
-    support it.
-    """
-
-    sampling_params: SamplingParams
-    """Parameters to control the sampling strategy."""
-
-    tool_choice: Literal["auto", "required", "none"]
-    """(Optional) Whether tool use is required or automatic.
-
-    Defaults to ToolChoice.auto. .. deprecated:: Use tool_config instead.
-    """
-
-    tool_config: ToolConfig
-    """(Optional) Configuration for tool use."""
-
-    tool_prompt_format: Literal["json", "function_tag", "python_list"]
-    """(Optional) Instructs the model how to format tool calls.
-
-    By default, Llama Stack will attempt to use a format that is best adapted to the
-    model. - `ToolPromptFormat.json`: The tool calls are formatted as a JSON
-    object. - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a
-    <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls
-    are output as Python syntax -- a list of function calls. .. deprecated:: Use
-    tool_config instead.
-    """
-
-    tools: Iterable[Tool]
-    """(Optional) List of tool definitions available to the model."""
-
-
-class Logprobs(TypedDict, total=False):
-    top_k: int
-    """How many tokens (for each position) to return log probabilities for."""
-
-
-class ToolConfig(TypedDict, total=False):
-    system_message_behavior: Literal["append", "replace"]
-    """(Optional) Config for how to override the default system prompt.
-
-    - `SystemMessageBehavior.append`: Appends the provided system message to the
-      default system prompt. - `SystemMessageBehavior.replace`: Replaces the default
-      system prompt with the provided system message. The system message can include
-      the string '{{function_definitions}}' to indicate where the function
-      definitions should be inserted.
-    """
-
-    tool_choice: Union[Literal["auto", "required", "none"], str]
-    """(Optional) Whether tool use is automatic, required, or none.
-
-    Can also specify a tool name to use a specific tool. Defaults to
-    ToolChoice.auto.
-    """
-
-    tool_prompt_format: Literal["json", "function_tag", "python_list"]
-    """(Optional) Instructs the model how to format tool calls.
-
-    By default, Llama Stack will attempt to use a format that is best adapted to the
-    model. - `ToolPromptFormat.json`: The tool calls are formatted as a JSON
-    object. - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a
-    <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls
-    are output as Python syntax -- a list of function calls.
-    """
-
-
-class Tool(TypedDict, total=False):
-    tool_name: Required[Union[Literal["brave_search", "wolfram_alpha", "photogen", "code_interpreter"], str]]
-
-    description: str
-
-    parameters: Dict[str, ToolParamDefinition]
-
-
-class InferenceChatCompletionParamsNonStreaming(InferenceChatCompletionParamsBase, total=False):
-    stream: Literal[False]
-    """(Optional) If True, generate an SSE event stream of the response.
-
-    Defaults to False.
-    """
-
-
-class InferenceChatCompletionParamsStreaming(InferenceChatCompletionParamsBase):
-    stream: Required[Literal[True]]
-    """(Optional) If True, generate an SSE event stream of the response.
-
-    Defaults to False.
-    """
-
-
-InferenceChatCompletionParams = Union[InferenceChatCompletionParamsNonStreaming, InferenceChatCompletionParamsStreaming]
diff --git a/src/llama_stack_client/types/inference_completion_params.py b/src/llama_stack_client/types/inference_completion_params.py
deleted file mode 100644
index c122f017..00000000
--- a/src/llama_stack_client/types/inference_completion_params.py
+++ /dev/null
@@ -1,65 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Union
-from typing_extensions import Literal, Required, TypedDict
-
-from .shared_params.response_format import ResponseFormat
-from .shared_params.sampling_params import SamplingParams
-from .shared_params.interleaved_content import InterleavedContent
-
-__all__ = [
-    "InferenceCompletionParamsBase",
-    "Logprobs",
-    "InferenceCompletionParamsNonStreaming",
-    "InferenceCompletionParamsStreaming",
-]
-
-
-class InferenceCompletionParamsBase(TypedDict, total=False):
-    content: Required[InterleavedContent]
-    """The content to generate a completion for."""
-
-    model_id: Required[str]
-    """The identifier of the model to use.
-
-    The model must be registered with Llama Stack and available via the /models
-    endpoint.
-    """
-
-    logprobs: Logprobs
-    """
-    (Optional) If specified, log probabilities for each token position will be
-    returned.
-    """
-
-    response_format: ResponseFormat
-    """(Optional) Grammar specification for guided (structured) decoding."""
-
-    sampling_params: SamplingParams
-    """(Optional) Parameters to control the sampling strategy."""
-
-
-class Logprobs(TypedDict, total=False):
-    top_k: int
-    """How many tokens (for each position) to return log probabilities for."""
-
-
-class InferenceCompletionParamsNonStreaming(InferenceCompletionParamsBase, total=False):
-    stream: Literal[False]
-    """(Optional) If True, generate an SSE event stream of the response.
-
-    Defaults to False.
-    """
-
-
-class InferenceCompletionParamsStreaming(InferenceCompletionParamsBase):
-    stream: Required[Literal[True]]
-    """(Optional) If True, generate an SSE event stream of the response.
-
-    Defaults to False.
-    """
-
-
-InferenceCompletionParams = Union[InferenceCompletionParamsNonStreaming, InferenceCompletionParamsStreaming]
diff --git a/src/llama_stack_client/types/inference_embeddings_params.py b/src/llama_stack_client/types/inference_embeddings_params.py
deleted file mode 100644
index 7bf5339f..00000000
--- a/src/llama_stack_client/types/inference_embeddings_params.py
+++ /dev/null
@@ -1,45 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import List, Union, Iterable
-from typing_extensions import Literal, Required, TypedDict
-
-from .shared_params.interleaved_content_item import InterleavedContentItem
-
-__all__ = ["InferenceEmbeddingsParams"]
-
-
-class InferenceEmbeddingsParams(TypedDict, total=False):
-    contents: Required[Union[List[str], Iterable[InterleavedContentItem]]]
-    """List of contents to generate embeddings for.
-
-    Each content can be a string or an InterleavedContentItem (and hence can be
-    multimodal). The behavior depends on the model and provider. Some models may
-    only support text.
-    """
-
-    model_id: Required[str]
-    """The identifier of the model to use.
-
-    The model must be an embedding model registered with Llama Stack and available
-    via the /models endpoint.
-    """
-
-    output_dimension: int
-    """(Optional) Output dimensionality for the embeddings.
-
-    Only supported by Matryoshka models.
-    """
-
-    task_type: Literal["query", "document"]
-    """
-    (Optional) How is the embedding being used? This is only supported by asymmetric
-    embedding models.
-    """
-
-    text_truncation: Literal["none", "start", "end"]
-    """
-    (Optional) Config for how to truncate text for embedding when text is longer
-    than the model's max sequence length.
-    """
diff --git a/src/llama_stack_client/types/inference_step.py b/src/llama_stack_client/types/inference_step.py
deleted file mode 100644
index 2aecb193..00000000
--- a/src/llama_stack_client/types/inference_step.py
+++ /dev/null
@@ -1,32 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Optional
-from datetime import datetime
-from typing_extensions import Literal
-
-from pydantic import Field as FieldInfo
-
-from .._models import BaseModel
-from .shared.completion_message import CompletionMessage
-
-__all__ = ["InferenceStep"]
-
-
-class InferenceStep(BaseModel):
-    api_model_response: CompletionMessage = FieldInfo(alias="model_response")
-    """The response from the LLM."""
-
-    step_id: str
-    """The ID of the step."""
-
-    step_type: Literal["inference"]
-    """Type of the step in an agent turn."""
-
-    turn_id: str
-    """The ID of the turn."""
-
-    completed_at: Optional[datetime] = None
-    """The time the step completed."""
-
-    started_at: Optional[datetime] = None
-    """The time the step started."""
diff --git a/src/llama_stack_client/types/job.py b/src/llama_stack_client/types/job.py
deleted file mode 100644
index 4953b3bf..00000000
--- a/src/llama_stack_client/types/job.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing_extensions import Literal
-
-from .._models import BaseModel
-
-__all__ = ["Job"]
-
-
-class Job(BaseModel):
-    job_id: str
-
-    status: Literal["completed", "in_progress", "failed", "scheduled", "cancelled"]
diff --git a/src/llama_stack_client/types/list_benchmarks_response.py b/src/llama_stack_client/types/list_benchmarks_response.py
deleted file mode 100644
index f265f130..00000000
--- a/src/llama_stack_client/types/list_benchmarks_response.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .._models import BaseModel
-from .benchmark_list_response import BenchmarkListResponse
-
-__all__ = ["ListBenchmarksResponse"]
-
-
-class ListBenchmarksResponse(BaseModel):
-    data: BenchmarkListResponse
diff --git a/src/llama_stack_client/types/list_datasets_response.py b/src/llama_stack_client/types/list_datasets_response.py
deleted file mode 100644
index 5a897f78..00000000
--- a/src/llama_stack_client/types/list_datasets_response.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .._models import BaseModel
-from .dataset_list_response import DatasetListResponse
-
-__all__ = ["ListDatasetsResponse"]
-
-
-class ListDatasetsResponse(BaseModel):
-    data: DatasetListResponse
diff --git a/src/llama_stack_client/types/list_files_response.py b/src/llama_stack_client/types/list_files_response.py
deleted file mode 100644
index cbb7d514..00000000
--- a/src/llama_stack_client/types/list_files_response.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List
-from typing_extensions import Literal
-
-from .file import File
-from .._models import BaseModel
-
-__all__ = ["ListFilesResponse"]
-
-
-class ListFilesResponse(BaseModel):
-    data: List[File]
-    """List of file objects"""
-
-    first_id: str
-
-    has_more: bool
-
-    last_id: str
-
-    object: Literal["list"]
-    """The object type, which is always "list" """
diff --git a/src/llama_stack_client/types/list_models_response.py b/src/llama_stack_client/types/list_models_response.py
deleted file mode 100644
index a36896b8..00000000
--- a/src/llama_stack_client/types/list_models_response.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .._models import BaseModel
-from .model_list_response import ModelListResponse
-
-__all__ = ["ListModelsResponse"]
-
-
-class ListModelsResponse(BaseModel):
-    data: ModelListResponse
diff --git a/src/llama_stack_client/types/list_post_training_jobs_response.py b/src/llama_stack_client/types/list_post_training_jobs_response.py
deleted file mode 100644
index 09d16628..00000000
--- a/src/llama_stack_client/types/list_post_training_jobs_response.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List
-
-from .._models import BaseModel
-
-__all__ = ["ListPostTrainingJobsResponse", "Data"]
-
-
-class Data(BaseModel):
-    job_uuid: str
-
-
-class ListPostTrainingJobsResponse(BaseModel):
-    data: List[Data]
diff --git a/src/llama_stack_client/types/list_providers_response.py b/src/llama_stack_client/types/list_providers_response.py
deleted file mode 100644
index 4904c0b1..00000000
--- a/src/llama_stack_client/types/list_providers_response.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .._models import BaseModel
-from .provider_list_response import ProviderListResponse
-
-__all__ = ["ListProvidersResponse"]
-
-
-class ListProvidersResponse(BaseModel):
-    data: ProviderListResponse
diff --git a/src/llama_stack_client/types/list_routes_response.py b/src/llama_stack_client/types/list_routes_response.py
deleted file mode 100644
index 59e8392b..00000000
--- a/src/llama_stack_client/types/list_routes_response.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .._models import BaseModel
-from .route_list_response import RouteListResponse
-
-__all__ = ["ListRoutesResponse"]
-
-
-class ListRoutesResponse(BaseModel):
-    data: RouteListResponse
diff --git a/src/llama_stack_client/types/list_scoring_functions_response.py b/src/llama_stack_client/types/list_scoring_functions_response.py
deleted file mode 100644
index 2c044ba1..00000000
--- a/src/llama_stack_client/types/list_scoring_functions_response.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .._models import BaseModel
-from .scoring_function_list_response import ScoringFunctionListResponse
-
-__all__ = ["ListScoringFunctionsResponse"]
-
-
-class ListScoringFunctionsResponse(BaseModel):
-    data: ScoringFunctionListResponse
diff --git a/src/llama_stack_client/types/list_shields_response.py b/src/llama_stack_client/types/list_shields_response.py
deleted file mode 100644
index fabbc9da..00000000
--- a/src/llama_stack_client/types/list_shields_response.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .._models import BaseModel
-from .shield_list_response import ShieldListResponse
-
-__all__ = ["ListShieldsResponse"]
-
-
-class ListShieldsResponse(BaseModel):
-    data: ShieldListResponse
diff --git a/src/llama_stack_client/types/list_tool_groups_response.py b/src/llama_stack_client/types/list_tool_groups_response.py
deleted file mode 100644
index 6433b164..00000000
--- a/src/llama_stack_client/types/list_tool_groups_response.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .._models import BaseModel
-from .toolgroup_list_response import ToolgroupListResponse
-
-__all__ = ["ListToolGroupsResponse"]
-
-
-class ListToolGroupsResponse(BaseModel):
-    data: ToolgroupListResponse
diff --git a/src/llama_stack_client/types/list_tools_response.py b/src/llama_stack_client/types/list_tools_response.py
deleted file mode 100644
index c9b4ec6b..00000000
--- a/src/llama_stack_client/types/list_tools_response.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .._models import BaseModel
-from .tool_list_response import ToolListResponse
-
-__all__ = ["ListToolsResponse"]
-
-
-class ListToolsResponse(BaseModel):
-    data: ToolListResponse
diff --git a/src/llama_stack_client/types/list_vector_dbs_response.py b/src/llama_stack_client/types/list_vector_dbs_response.py
deleted file mode 100644
index fede6c42..00000000
--- a/src/llama_stack_client/types/list_vector_dbs_response.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .._models import BaseModel
-from .vector_db_list_response import VectorDBListResponse
-
-__all__ = ["ListVectorDBsResponse"]
-
-
-class ListVectorDBsResponse(BaseModel):
-    data: VectorDBListResponse
diff --git a/src/llama_stack_client/types/list_vector_stores_response.py b/src/llama_stack_client/types/list_vector_stores_response.py
deleted file mode 100644
index c79fd895..00000000
--- a/src/llama_stack_client/types/list_vector_stores_response.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Optional
-
-from .._models import BaseModel
-from .vector_store import VectorStore
-
-__all__ = ["ListVectorStoresResponse"]
-
-
-class ListVectorStoresResponse(BaseModel):
-    data: List[VectorStore]
-
-    has_more: bool
-
-    object: str
-
-    first_id: Optional[str] = None
-
-    last_id: Optional[str] = None
diff --git a/src/llama_stack_client/types/memory_retrieval_step.py b/src/llama_stack_client/types/memory_retrieval_step.py
deleted file mode 100644
index 887e9986..00000000
--- a/src/llama_stack_client/types/memory_retrieval_step.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Optional
-from datetime import datetime
-from typing_extensions import Literal
-
-from .._models import BaseModel
-from .shared.interleaved_content import InterleavedContent
-
-__all__ = ["MemoryRetrievalStep"]
-
-
-class MemoryRetrievalStep(BaseModel):
-    inserted_context: InterleavedContent
-    """The context retrieved from the vector databases."""
-
-    step_id: str
-    """The ID of the step."""
-
-    step_type: Literal["memory_retrieval"]
-    """Type of the step in an agent turn."""
-
-    turn_id: str
-    """The ID of the turn."""
-
-    vector_db_ids: str
-    """The IDs of the vector databases to retrieve context from."""
-
-    completed_at: Optional[datetime] = None
-    """The time the step completed."""
-
-    started_at: Optional[datetime] = None
-    """The time the step started."""
diff --git a/src/llama_stack_client/types/model.py b/src/llama_stack_client/types/model.py
deleted file mode 100644
index dea24d53..00000000
--- a/src/llama_stack_client/types/model.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-from typing_extensions import Literal
-
-from pydantic import Field as FieldInfo
-
-from .._models import BaseModel
-
-__all__ = ["Model"]
-
-
-class Model(BaseModel):
-    identifier: str
-
-    metadata: Dict[str, Union[bool, float, str, List[object], object, None]]
-
-    api_model_type: Literal["llm", "embedding"] = FieldInfo(alias="model_type")
-
-    provider_id: str
-
-    type: Literal["model"]
-
-    provider_resource_id: Optional[str] = None
diff --git a/src/llama_stack_client/types/model_register_params.py b/src/llama_stack_client/types/model_register_params.py
deleted file mode 100644
index fb1d9fb6..00000000
--- a/src/llama_stack_client/types/model_register_params.py
+++ /dev/null
@@ -1,25 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union, Iterable
-from typing_extensions import Literal, Required, TypedDict
-
-__all__ = ["ModelRegisterParams"]
-
-
-class ModelRegisterParams(TypedDict, total=False):
-    model_id: Required[str]
-    """The identifier of the model to register."""
-
-    metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
-    """Any additional metadata for this model."""
-
-    model_type: Literal["llm", "embedding"]
-    """The type of model to register."""
-
-    provider_id: str
-    """The identifier of the provider."""
-
-    provider_model_id: str
-    """The identifier of the model in the provider."""
diff --git a/src/llama_stack_client/types/post_training/__init__.py b/src/llama_stack_client/types/post_training/__init__.py
deleted file mode 100644
index d5472d43..00000000
--- a/src/llama_stack_client/types/post_training/__init__.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from .job_cancel_params import JobCancelParams as JobCancelParams
-from .job_list_response import JobListResponse as JobListResponse
-from .job_status_params import JobStatusParams as JobStatusParams
-from .job_status_response import JobStatusResponse as JobStatusResponse
-from .job_artifacts_params import JobArtifactsParams as JobArtifactsParams
-from .job_artifacts_response import JobArtifactsResponse as JobArtifactsResponse
diff --git a/src/llama_stack_client/types/post_training/job_artifacts_params.py b/src/llama_stack_client/types/post_training/job_artifacts_params.py
deleted file mode 100644
index 851ebf5f..00000000
--- a/src/llama_stack_client/types/post_training/job_artifacts_params.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Required, TypedDict
-
-__all__ = ["JobArtifactsParams"]
-
-
-class JobArtifactsParams(TypedDict, total=False):
-    job_uuid: Required[str]
-    """The UUID of the job to get the artifacts of."""
diff --git a/src/llama_stack_client/types/post_training/job_artifacts_response.py b/src/llama_stack_client/types/post_training/job_artifacts_response.py
deleted file mode 100644
index 0fb98c6c..00000000
--- a/src/llama_stack_client/types/post_training/job_artifacts_response.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List
-
-from ..._models import BaseModel
-
-__all__ = ["JobArtifactsResponse"]
-
-
-class JobArtifactsResponse(BaseModel):
-    checkpoints: List[object]
-
-    job_uuid: str
diff --git a/src/llama_stack_client/types/post_training/job_cancel_params.py b/src/llama_stack_client/types/post_training/job_cancel_params.py
deleted file mode 100644
index 3a976e87..00000000
--- a/src/llama_stack_client/types/post_training/job_cancel_params.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Required, TypedDict
-
-__all__ = ["JobCancelParams"]
-
-
-class JobCancelParams(TypedDict, total=False):
-    job_uuid: Required[str]
-    """The UUID of the job to cancel."""
diff --git a/src/llama_stack_client/types/post_training/job_list_response.py b/src/llama_stack_client/types/post_training/job_list_response.py
deleted file mode 100644
index cb42da2d..00000000
--- a/src/llama_stack_client/types/post_training/job_list_response.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List
-from typing_extensions import TypeAlias
-
-from ..._models import BaseModel
-
-__all__ = ["JobListResponse", "JobListResponseItem"]
-
-
-class JobListResponseItem(BaseModel):
-    job_uuid: str
-
-
-JobListResponse: TypeAlias = List[JobListResponseItem]
diff --git a/src/llama_stack_client/types/post_training/job_status_params.py b/src/llama_stack_client/types/post_training/job_status_params.py
deleted file mode 100644
index d5e040e0..00000000
--- a/src/llama_stack_client/types/post_training/job_status_params.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Required, TypedDict
-
-__all__ = ["JobStatusParams"]
-
-
-class JobStatusParams(TypedDict, total=False):
-    job_uuid: Required[str]
-    """The UUID of the job to get the status of."""
diff --git a/src/llama_stack_client/types/post_training/job_status_response.py b/src/llama_stack_client/types/post_training/job_status_response.py
deleted file mode 100644
index 5ba60a6a..00000000
--- a/src/llama_stack_client/types/post_training/job_status_response.py
+++ /dev/null
@@ -1,25 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-from datetime import datetime
-from typing_extensions import Literal
-
-from ..._models import BaseModel
-
-__all__ = ["JobStatusResponse"]
-
-
-class JobStatusResponse(BaseModel):
-    checkpoints: List[object]
-
-    job_uuid: str
-
-    status: Literal["completed", "in_progress", "failed", "scheduled", "cancelled"]
-
-    completed_at: Optional[datetime] = None
-
-    resources_allocated: Optional[Dict[str, Union[bool, float, str, List[object], object, None]]] = None
-
-    scheduled_at: Optional[datetime] = None
-
-    started_at: Optional[datetime] = None
diff --git a/src/llama_stack_client/types/post_training_job.py b/src/llama_stack_client/types/post_training_job.py
deleted file mode 100644
index d0ba5fce..00000000
--- a/src/llama_stack_client/types/post_training_job.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .._models import BaseModel
-
-__all__ = ["PostTrainingJob"]
-
-
-class PostTrainingJob(BaseModel):
-    job_uuid: str
diff --git a/src/llama_stack_client/types/post_training_preference_optimize_params.py b/src/llama_stack_client/types/post_training_preference_optimize_params.py
deleted file mode 100644
index f7d998eb..00000000
--- a/src/llama_stack_client/types/post_training_preference_optimize_params.py
+++ /dev/null
@@ -1,99 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union, Iterable
-from typing_extensions import Literal, Required, TypedDict
-
-__all__ = [
-    "PostTrainingPreferenceOptimizeParams",
-    "AlgorithmConfig",
-    "TrainingConfig",
-    "TrainingConfigDataConfig",
-    "TrainingConfigEfficiencyConfig",
-    "TrainingConfigOptimizerConfig",
-]
-
-
-class PostTrainingPreferenceOptimizeParams(TypedDict, total=False):
-    algorithm_config: Required[AlgorithmConfig]
-    """The algorithm configuration."""
-
-    finetuned_model: Required[str]
-    """The model to fine-tune."""
-
-    hyperparam_search_config: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-    """The hyperparam search configuration."""
-
-    job_uuid: Required[str]
-    """The UUID of the job to create."""
-
-    logger_config: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-    """The logger configuration."""
-
-    training_config: Required[TrainingConfig]
-    """The training configuration."""
-
-
-class AlgorithmConfig(TypedDict, total=False):
-    epsilon: Required[float]
-
-    gamma: Required[float]
-
-    reward_clip: Required[float]
-
-    reward_scale: Required[float]
-
-
-class TrainingConfigDataConfig(TypedDict, total=False):
-    batch_size: Required[int]
-
-    data_format: Required[Literal["instruct", "dialog"]]
-
-    dataset_id: Required[str]
-
-    shuffle: Required[bool]
-
-    packed: bool
-
-    train_on_input: bool
-
-    validation_dataset_id: str
-
-
-class TrainingConfigEfficiencyConfig(TypedDict, total=False):
-    enable_activation_checkpointing: bool
-
-    enable_activation_offloading: bool
-
-    fsdp_cpu_offload: bool
-
-    memory_efficient_fsdp_wrap: bool
-
-
-class TrainingConfigOptimizerConfig(TypedDict, total=False):
-    lr: Required[float]
-
-    num_warmup_steps: Required[int]
-
-    optimizer_type: Required[Literal["adam", "adamw", "sgd"]]
-
-    weight_decay: Required[float]
-
-
-class TrainingConfig(TypedDict, total=False):
-    gradient_accumulation_steps: Required[int]
-
-    max_steps_per_epoch: Required[int]
-
-    n_epochs: Required[int]
-
-    data_config: TrainingConfigDataConfig
-
-    dtype: str
-
-    efficiency_config: TrainingConfigEfficiencyConfig
-
-    max_validation_steps: int
-
-    optimizer_config: TrainingConfigOptimizerConfig
diff --git a/src/llama_stack_client/types/post_training_supervised_fine_tune_params.py b/src/llama_stack_client/types/post_training_supervised_fine_tune_params.py
deleted file mode 100644
index 596ec18b..00000000
--- a/src/llama_stack_client/types/post_training_supervised_fine_tune_params.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union, Iterable
-from typing_extensions import Literal, Required, TypedDict
-
-from .algorithm_config_param import AlgorithmConfigParam
-
-__all__ = [
-    "PostTrainingSupervisedFineTuneParams",
-    "TrainingConfig",
-    "TrainingConfigDataConfig",
-    "TrainingConfigEfficiencyConfig",
-    "TrainingConfigOptimizerConfig",
-]
-
-
-class PostTrainingSupervisedFineTuneParams(TypedDict, total=False):
-    hyperparam_search_config: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-    """The hyperparam search configuration."""
-
-    job_uuid: Required[str]
-    """The UUID of the job to create."""
-
-    logger_config: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-    """The logger configuration."""
-
-    training_config: Required[TrainingConfig]
-    """The training configuration."""
-
-    algorithm_config: AlgorithmConfigParam
-    """The algorithm configuration."""
-
-    checkpoint_dir: str
-    """The directory to save checkpoint(s) to."""
-
-    model: str
-    """The model to fine-tune."""
-
-
-class TrainingConfigDataConfig(TypedDict, total=False):
-    batch_size: Required[int]
-
-    data_format: Required[Literal["instruct", "dialog"]]
-
-    dataset_id: Required[str]
-
-    shuffle: Required[bool]
-
-    packed: bool
-
-    train_on_input: bool
-
-    validation_dataset_id: str
-
-
-class TrainingConfigEfficiencyConfig(TypedDict, total=False):
-    enable_activation_checkpointing: bool
-
-    enable_activation_offloading: bool
-
-    fsdp_cpu_offload: bool
-
-    memory_efficient_fsdp_wrap: bool
-
-
-class TrainingConfigOptimizerConfig(TypedDict, total=False):
-    lr: Required[float]
-
-    num_warmup_steps: Required[int]
-
-    optimizer_type: Required[Literal["adam", "adamw", "sgd"]]
-
-    weight_decay: Required[float]
-
-
-class TrainingConfig(TypedDict, total=False):
-    gradient_accumulation_steps: Required[int]
-
-    max_steps_per_epoch: Required[int]
-
-    n_epochs: Required[int]
-
-    data_config: TrainingConfigDataConfig
-
-    dtype: str
-
-    efficiency_config: TrainingConfigEfficiencyConfig
-
-    max_validation_steps: int
-
-    optimizer_config: TrainingConfigOptimizerConfig
diff --git a/src/llama_stack_client/types/provider_info.py b/src/llama_stack_client/types/provider_info.py
deleted file mode 100644
index c9c748cc..00000000
--- a/src/llama_stack_client/types/provider_info.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union
-
-from .._models import BaseModel
-
-__all__ = ["ProviderInfo"]
-
-
-class ProviderInfo(BaseModel):
-    api: str
-
-    config: Dict[str, Union[bool, float, str, List[object], object, None]]
-
-    health: Dict[str, Union[bool, float, str, List[object], object, None]]
-
-    provider_id: str
-
-    provider_type: str
diff --git a/src/llama_stack_client/types/provider_list_response.py b/src/llama_stack_client/types/provider_list_response.py
deleted file mode 100644
index cdbc96f7..00000000
--- a/src/llama_stack_client/types/provider_list_response.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List
-from typing_extensions import TypeAlias
-
-from .provider_info import ProviderInfo
-
-__all__ = ["ProviderListResponse"]
-
-ProviderListResponse: TypeAlias = List[ProviderInfo]
diff --git a/src/llama_stack_client/types/query_chunks_response.py b/src/llama_stack_client/types/query_chunks_response.py
deleted file mode 100644
index 97c1927c..00000000
--- a/src/llama_stack_client/types/query_chunks_response.py
+++ /dev/null
@@ -1,80 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-
-from .._models import BaseModel
-from .shared.interleaved_content import InterleavedContent
-
-__all__ = ["QueryChunksResponse", "Chunk", "ChunkChunkMetadata"]
-
-
-class ChunkChunkMetadata(BaseModel):
-    chunk_embedding_dimension: Optional[int] = None
-    """The dimension of the embedding vector for the chunk."""
-
-    chunk_embedding_model: Optional[str] = None
-    """The embedding model used to create the chunk's embedding."""
-
-    chunk_id: Optional[str] = None
-    """The ID of the chunk.
-
-    If not set, it will be generated based on the document ID and content.
-    """
-
-    chunk_tokenizer: Optional[str] = None
-    """The tokenizer used to create the chunk. Default is Tiktoken."""
-
-    chunk_window: Optional[str] = None
-    """The window of the chunk, which can be used to group related chunks together."""
-
-    content_token_count: Optional[int] = None
-    """The number of tokens in the content of the chunk."""
-
-    created_timestamp: Optional[int] = None
-    """An optional timestamp indicating when the chunk was created."""
-
-    document_id: Optional[str] = None
-    """The ID of the document this chunk belongs to."""
-
-    metadata_token_count: Optional[int] = None
-    """The number of tokens in the metadata of the chunk."""
-
-    source: Optional[str] = None
-    """The source of the content, such as a URL, file path, or other identifier."""
-
-    updated_timestamp: Optional[int] = None
-    """An optional timestamp indicating when the chunk was last updated."""
-
-
-class Chunk(BaseModel):
-    content: InterleavedContent
-    """
-    The content of the chunk, which can be interleaved text, images, or other types.
-    """
-
-    metadata: Dict[str, Union[bool, float, str, List[object], object, None]]
-    """
-    Metadata associated with the chunk that will be used in the model context during
-    inference.
-    """
-
-    chunk_metadata: Optional[ChunkChunkMetadata] = None
-    """Metadata for the chunk that will NOT be used in the context during inference.
-
-    The `chunk_metadata` is required backend functionality.
-    """
-
-    embedding: Optional[List[float]] = None
-    """Optional embedding for the chunk. If not provided, it will be computed later."""
-
-    stored_chunk_id: Optional[str] = None
-    """The chunk ID that is stored in the vector database.
-
-    Used for backend functionality.
-    """
-
-
-class QueryChunksResponse(BaseModel):
-    chunks: List[Chunk]
-
-    scores: List[float]
diff --git a/src/llama_stack_client/types/query_condition_param.py b/src/llama_stack_client/types/query_condition_param.py
deleted file mode 100644
index 37c48093..00000000
--- a/src/llama_stack_client/types/query_condition_param.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Union, Iterable
-from typing_extensions import Literal, Required, TypedDict
-
-__all__ = ["QueryConditionParam"]
-
-
-class QueryConditionParam(TypedDict, total=False):
-    key: Required[str]
-
-    op: Required[Literal["eq", "ne", "gt", "lt"]]
-
-    value: Required[Union[bool, float, str, Iterable[object], object, None]]
diff --git a/src/llama_stack_client/types/query_spans_response.py b/src/llama_stack_client/types/query_spans_response.py
deleted file mode 100644
index 488a4331..00000000
--- a/src/llama_stack_client/types/query_spans_response.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .._models import BaseModel
-from .telemetry_query_spans_response import TelemetryQuerySpansResponse
-
-__all__ = ["QuerySpansResponse"]
-
-
-class QuerySpansResponse(BaseModel):
-    data: TelemetryQuerySpansResponse
diff --git a/src/llama_stack_client/types/response_create_params.py b/src/llama_stack_client/types/response_create_params.py
deleted file mode 100644
index 471d8b21..00000000
--- a/src/llama_stack_client/types/response_create_params.py
+++ /dev/null
@@ -1,348 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, List, Union, Iterable
-from typing_extensions import Literal, Required, TypeAlias, TypedDict
-
-__all__ = [
-    "ResponseCreateParamsBase",
-    "InputUnionMember1",
-    "InputUnionMember1OpenAIResponseOutputMessageWebSearchToolCall",
-    "InputUnionMember1OpenAIResponseOutputMessageFileSearchToolCall",
-    "InputUnionMember1OpenAIResponseOutputMessageFunctionToolCall",
-    "InputUnionMember1OpenAIResponseInputFunctionToolCallOutput",
-    "InputUnionMember1OpenAIResponseMessage",
-    "InputUnionMember1OpenAIResponseMessageContentUnionMember1",
-    "InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
-    "InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
-    "InputUnionMember1OpenAIResponseMessageContentUnionMember2",
-    "InputUnionMember1OpenAIResponseMessageContentUnionMember2Annotation",
-    "InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation",
-    "InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation",
-    "InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation",
-    "InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath",
-    "Text",
-    "TextFormat",
-    "Tool",
-    "ToolOpenAIResponseInputToolWebSearch",
-    "ToolOpenAIResponseInputToolFileSearch",
-    "ToolOpenAIResponseInputToolFileSearchRankingOptions",
-    "ToolOpenAIResponseInputToolFunction",
-    "ToolOpenAIResponseInputToolMcp",
-    "ToolOpenAIResponseInputToolMcpRequireApproval",
-    "ToolOpenAIResponseInputToolMcpRequireApprovalApprovalFilter",
-    "ToolOpenAIResponseInputToolMcpAllowedTools",
-    "ToolOpenAIResponseInputToolMcpAllowedToolsAllowedToolsFilter",
-    "ResponseCreateParamsNonStreaming",
-    "ResponseCreateParamsStreaming",
-]
-
-
-class ResponseCreateParamsBase(TypedDict, total=False):
-    input: Required[Union[str, Iterable[InputUnionMember1]]]
-    """Input message(s) to create the response."""
-
-    model: Required[str]
-    """The underlying LLM used for completions."""
-
-    instructions: str
-
-    max_infer_iters: int
-
-    previous_response_id: str
-    """
-    (Optional) if specified, the new response will be a continuation of the previous
-    response. This can be used to easily fork-off new responses from existing
-    responses.
-    """
-
-    store: bool
-
-    temperature: float
-
-    text: Text
-
-    tools: Iterable[Tool]
-
-
-class InputUnionMember1OpenAIResponseOutputMessageWebSearchToolCall(TypedDict, total=False):
-    id: Required[str]
-
-    status: Required[str]
-
-    type: Required[Literal["web_search_call"]]
-
-
-class InputUnionMember1OpenAIResponseOutputMessageFileSearchToolCall(TypedDict, total=False):
-    id: Required[str]
-
-    queries: Required[List[str]]
-
-    status: Required[str]
-
-    type: Required[Literal["file_search_call"]]
-
-    results: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-
-
-class InputUnionMember1OpenAIResponseOutputMessageFunctionToolCall(TypedDict, total=False):
-    arguments: Required[str]
-
-    call_id: Required[str]
-
-    name: Required[str]
-
-    type: Required[Literal["function_call"]]
-
-    id: str
-
-    status: str
-
-
-class InputUnionMember1OpenAIResponseInputFunctionToolCallOutput(TypedDict, total=False):
-    call_id: Required[str]
-
-    output: Required[str]
-
-    type: Required[Literal["function_call_output"]]
-
-    id: str
-
-    status: str
-
-
-class InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText(
-    TypedDict, total=False
-):
-    text: Required[str]
-
-    type: Required[Literal["input_text"]]
-
-
-class InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage(
-    TypedDict, total=False
-):
-    detail: Required[Literal["low", "high", "auto"]]
-
-    type: Required[Literal["input_image"]]
-
-    image_url: str
-
-
-InputUnionMember1OpenAIResponseMessageContentUnionMember1: TypeAlias = Union[
-    InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
-    InputUnionMember1OpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
-]
-
-
-class InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation(
-    TypedDict, total=False
-):
-    file_id: Required[str]
-
-    filename: Required[str]
-
-    index: Required[int]
-
-    type: Required[Literal["file_citation"]]
-
-
-class InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation(
-    TypedDict, total=False
-):
-    end_index: Required[int]
-
-    start_index: Required[int]
-
-    title: Required[str]
-
-    type: Required[Literal["url_citation"]]
-
-    url: Required[str]
-
-
-class InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation(
-    TypedDict, total=False
-):
-    container_id: Required[str]
-
-    end_index: Required[int]
-
-    file_id: Required[str]
-
-    filename: Required[str]
-
-    start_index: Required[int]
-
-    type: Required[Literal["container_file_citation"]]
-
-
-class InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath(
-    TypedDict, total=False
-):
-    file_id: Required[str]
-
-    index: Required[int]
-
-    type: Required[Literal["file_path"]]
-
-
-InputUnionMember1OpenAIResponseMessageContentUnionMember2Annotation: TypeAlias = Union[
-    InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation,
-    InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation,
-    InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation,
-    InputUnionMember1OpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath,
-]
-
-
-class InputUnionMember1OpenAIResponseMessageContentUnionMember2(TypedDict, total=False):
-    annotations: Required[Iterable[InputUnionMember1OpenAIResponseMessageContentUnionMember2Annotation]]
-
-    text: Required[str]
-
-    type: Required[Literal["output_text"]]
-
-
-class InputUnionMember1OpenAIResponseMessage(TypedDict, total=False):
-    content: Required[
-        Union[
-            str,
-            Iterable[InputUnionMember1OpenAIResponseMessageContentUnionMember1],
-            Iterable[InputUnionMember1OpenAIResponseMessageContentUnionMember2],
-        ]
-    ]
-
-    role: Required[Literal["system", "developer", "user", "assistant"]]
-
-    type: Required[Literal["message"]]
-
-    id: str
-
-    status: str
-
-
-InputUnionMember1: TypeAlias = Union[
-    InputUnionMember1OpenAIResponseOutputMessageWebSearchToolCall,
-    InputUnionMember1OpenAIResponseOutputMessageFileSearchToolCall,
-    InputUnionMember1OpenAIResponseOutputMessageFunctionToolCall,
-    InputUnionMember1OpenAIResponseInputFunctionToolCallOutput,
-    InputUnionMember1OpenAIResponseMessage,
-]
-
-
-class TextFormat(TypedDict, total=False):
-    type: Required[Literal["text", "json_schema", "json_object"]]
-    """Must be "text", "json_schema", or "json_object" to identify the format type"""
-
-    description: str
-    """(Optional) A description of the response format. Only used for json_schema."""
-
-    name: str
-    """The name of the response format. Only used for json_schema."""
-
-    schema: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
-    """The JSON schema the response should conform to.
-
-    In a Python SDK, this is often a `pydantic` model. Only used for json_schema.
-    """
-
-    strict: bool
-    """(Optional) Whether to strictly enforce the JSON schema.
-
-    If true, the response must match the schema exactly. Only used for json_schema.
-    """
-
-
-class Text(TypedDict, total=False):
-    format: TextFormat
-    """Configuration for Responses API text format."""
-
-
-class ToolOpenAIResponseInputToolWebSearch(TypedDict, total=False):
-    type: Required[Literal["web_search", "web_search_preview", "web_search_preview_2025_03_11"]]
-
-    search_context_size: str
-
-
-class ToolOpenAIResponseInputToolFileSearchRankingOptions(TypedDict, total=False):
-    ranker: str
-
-    score_threshold: float
-
-
-class ToolOpenAIResponseInputToolFileSearch(TypedDict, total=False):
-    type: Required[Literal["file_search"]]
-
-    vector_store_ids: Required[List[str]]
-
-    filters: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
-
-    max_num_results: int
-
-    ranking_options: ToolOpenAIResponseInputToolFileSearchRankingOptions
-
-
-class ToolOpenAIResponseInputToolFunction(TypedDict, total=False):
-    name: Required[str]
-
-    type: Required[Literal["function"]]
-
-    description: str
-
-    parameters: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
-
-    strict: bool
-
-
-class ToolOpenAIResponseInputToolMcpRequireApprovalApprovalFilter(TypedDict, total=False):
-    always: List[str]
-
-    never: List[str]
-
-
-ToolOpenAIResponseInputToolMcpRequireApproval: TypeAlias = Union[
-    Literal["always", "never"], ToolOpenAIResponseInputToolMcpRequireApprovalApprovalFilter
-]
-
-
-class ToolOpenAIResponseInputToolMcpAllowedToolsAllowedToolsFilter(TypedDict, total=False):
-    tool_names: List[str]
-
-
-ToolOpenAIResponseInputToolMcpAllowedTools: TypeAlias = Union[
-    List[str], ToolOpenAIResponseInputToolMcpAllowedToolsAllowedToolsFilter
-]
-
-
-class ToolOpenAIResponseInputToolMcp(TypedDict, total=False):
-    require_approval: Required[ToolOpenAIResponseInputToolMcpRequireApproval]
-
-    server_label: Required[str]
-
-    server_url: Required[str]
-
-    type: Required[Literal["mcp"]]
-
-    allowed_tools: ToolOpenAIResponseInputToolMcpAllowedTools
-
-    headers: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
-
-
-Tool: TypeAlias = Union[
-    ToolOpenAIResponseInputToolWebSearch,
-    ToolOpenAIResponseInputToolFileSearch,
-    ToolOpenAIResponseInputToolFunction,
-    ToolOpenAIResponseInputToolMcp,
-]
-
-
-class ResponseCreateParamsNonStreaming(ResponseCreateParamsBase, total=False):
-    stream: Literal[False]
-
-
-class ResponseCreateParamsStreaming(ResponseCreateParamsBase):
-    stream: Required[Literal[True]]
-
-
-ResponseCreateParams = Union[ResponseCreateParamsNonStreaming, ResponseCreateParamsStreaming]
diff --git a/src/llama_stack_client/types/response_list_params.py b/src/llama_stack_client/types/response_list_params.py
deleted file mode 100644
index ca13adfa..00000000
--- a/src/llama_stack_client/types/response_list_params.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Literal, TypedDict
-
-__all__ = ["ResponseListParams"]
-
-
-class ResponseListParams(TypedDict, total=False):
-    after: str
-    """The ID of the last response to return."""
-
-    limit: int
-    """The number of responses to return."""
-
-    model: str
-    """The model to filter responses by."""
-
-    order: Literal["asc", "desc"]
-    """The order to sort responses by when sorted by created_at ('asc' or 'desc')."""
diff --git a/src/llama_stack_client/types/response_list_response.py b/src/llama_stack_client/types/response_list_response.py
deleted file mode 100644
index d46213ef..00000000
--- a/src/llama_stack_client/types/response_list_response.py
+++ /dev/null
@@ -1,473 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-from typing_extensions import Literal, Annotated, TypeAlias
-
-from pydantic import Field as FieldInfo
-
-from .._utils import PropertyInfo
-from .._models import BaseModel
-
-__all__ = [
-    "ResponseListResponse",
-    "Data",
-    "DataInput",
-    "DataInputOpenAIResponseOutputMessageWebSearchToolCall",
-    "DataInputOpenAIResponseOutputMessageFileSearchToolCall",
-    "DataInputOpenAIResponseOutputMessageFunctionToolCall",
-    "DataInputOpenAIResponseInputFunctionToolCallOutput",
-    "DataInputOpenAIResponseMessage",
-    "DataInputOpenAIResponseMessageContentUnionMember1",
-    "DataInputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
-    "DataInputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
-    "DataInputOpenAIResponseMessageContentUnionMember2",
-    "DataInputOpenAIResponseMessageContentUnionMember2Annotation",
-    "DataInputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation",
-    "DataInputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation",
-    "DataInputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation",
-    "DataInputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath",
-    "DataOutput",
-    "DataOutputOpenAIResponseMessage",
-    "DataOutputOpenAIResponseMessageContentUnionMember1",
-    "DataOutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
-    "DataOutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
-    "DataOutputOpenAIResponseMessageContentUnionMember2",
-    "DataOutputOpenAIResponseMessageContentUnionMember2Annotation",
-    "DataOutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation",
-    "DataOutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation",
-    "DataOutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation",
-    "DataOutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath",
-    "DataOutputOpenAIResponseOutputMessageWebSearchToolCall",
-    "DataOutputOpenAIResponseOutputMessageFileSearchToolCall",
-    "DataOutputOpenAIResponseOutputMessageFunctionToolCall",
-    "DataOutputOpenAIResponseOutputMessageMcpCall",
-    "DataOutputOpenAIResponseOutputMessageMcpListTools",
-    "DataOutputOpenAIResponseOutputMessageMcpListToolsTool",
-    "DataText",
-    "DataTextFormat",
-    "DataError",
-]
-
-
-class DataInputOpenAIResponseOutputMessageWebSearchToolCall(BaseModel):
-    id: str
-
-    status: str
-
-    type: Literal["web_search_call"]
-
-
-class DataInputOpenAIResponseOutputMessageFileSearchToolCall(BaseModel):
-    id: str
-
-    queries: List[str]
-
-    status: str
-
-    type: Literal["file_search_call"]
-
-    results: Optional[List[Dict[str, Union[bool, float, str, List[object], object, None]]]] = None
-
-
-class DataInputOpenAIResponseOutputMessageFunctionToolCall(BaseModel):
-    arguments: str
-
-    call_id: str
-
-    name: str
-
-    type: Literal["function_call"]
-
-    id: Optional[str] = None
-
-    status: Optional[str] = None
-
-
-class DataInputOpenAIResponseInputFunctionToolCallOutput(BaseModel):
-    call_id: str
-
-    output: str
-
-    type: Literal["function_call_output"]
-
-    id: Optional[str] = None
-
-    status: Optional[str] = None
-
-
-class DataInputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText(BaseModel):
-    text: str
-
-    type: Literal["input_text"]
-
-
-class DataInputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage(BaseModel):
-    detail: Literal["low", "high", "auto"]
-
-    type: Literal["input_image"]
-
-    image_url: Optional[str] = None
-
-
-DataInputOpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[
-    Union[
-        DataInputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
-        DataInputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class DataInputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation(BaseModel):
-    file_id: str
-
-    filename: str
-
-    index: int
-
-    type: Literal["file_citation"]
-
-
-class DataInputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation(BaseModel):
-    end_index: int
-
-    start_index: int
-
-    title: str
-
-    type: Literal["url_citation"]
-
-    url: str
-
-
-class DataInputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation(
-    BaseModel
-):
-    container_id: str
-
-    end_index: int
-
-    file_id: str
-
-    filename: str
-
-    start_index: int
-
-    type: Literal["container_file_citation"]
-
-
-class DataInputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath(BaseModel):
-    file_id: str
-
-    index: int
-
-    type: Literal["file_path"]
-
-
-DataInputOpenAIResponseMessageContentUnionMember2Annotation: TypeAlias = Annotated[
-    Union[
-        DataInputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation,
-        DataInputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation,
-        DataInputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation,
-        DataInputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class DataInputOpenAIResponseMessageContentUnionMember2(BaseModel):
-    annotations: List[DataInputOpenAIResponseMessageContentUnionMember2Annotation]
-
-    text: str
-
-    type: Literal["output_text"]
-
-
-class DataInputOpenAIResponseMessage(BaseModel):
-    content: Union[
-        str,
-        List[DataInputOpenAIResponseMessageContentUnionMember1],
-        List[DataInputOpenAIResponseMessageContentUnionMember2],
-    ]
-
-    role: Literal["system", "developer", "user", "assistant"]
-
-    type: Literal["message"]
-
-    id: Optional[str] = None
-
-    status: Optional[str] = None
-
-
-DataInput: TypeAlias = Union[
-    DataInputOpenAIResponseOutputMessageWebSearchToolCall,
-    DataInputOpenAIResponseOutputMessageFileSearchToolCall,
-    DataInputOpenAIResponseOutputMessageFunctionToolCall,
-    DataInputOpenAIResponseInputFunctionToolCallOutput,
-    DataInputOpenAIResponseMessage,
-]
-
-
-class DataOutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText(BaseModel):
-    text: str
-
-    type: Literal["input_text"]
-
-
-class DataOutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage(BaseModel):
-    detail: Literal["low", "high", "auto"]
-
-    type: Literal["input_image"]
-
-    image_url: Optional[str] = None
-
-
-DataOutputOpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[
-    Union[
-        DataOutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
-        DataOutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class DataOutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation(BaseModel):
-    file_id: str
-
-    filename: str
-
-    index: int
-
-    type: Literal["file_citation"]
-
-
-class DataOutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation(BaseModel):
-    end_index: int
-
-    start_index: int
-
-    title: str
-
-    type: Literal["url_citation"]
-
-    url: str
-
-
-class DataOutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation(
-    BaseModel
-):
-    container_id: str
-
-    end_index: int
-
-    file_id: str
-
-    filename: str
-
-    start_index: int
-
-    type: Literal["container_file_citation"]
-
-
-class DataOutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath(BaseModel):
-    file_id: str
-
-    index: int
-
-    type: Literal["file_path"]
-
-
-DataOutputOpenAIResponseMessageContentUnionMember2Annotation: TypeAlias = Annotated[
-    Union[
-        DataOutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation,
-        DataOutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation,
-        DataOutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation,
-        DataOutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class DataOutputOpenAIResponseMessageContentUnionMember2(BaseModel):
-    annotations: List[DataOutputOpenAIResponseMessageContentUnionMember2Annotation]
-
-    text: str
-
-    type: Literal["output_text"]
-
-
-class DataOutputOpenAIResponseMessage(BaseModel):
-    content: Union[
-        str,
-        List[DataOutputOpenAIResponseMessageContentUnionMember1],
-        List[DataOutputOpenAIResponseMessageContentUnionMember2],
-    ]
-
-    role: Literal["system", "developer", "user", "assistant"]
-
-    type: Literal["message"]
-
-    id: Optional[str] = None
-
-    status: Optional[str] = None
-
-
-class DataOutputOpenAIResponseOutputMessageWebSearchToolCall(BaseModel):
-    id: str
-
-    status: str
-
-    type: Literal["web_search_call"]
-
-
-class DataOutputOpenAIResponseOutputMessageFileSearchToolCall(BaseModel):
-    id: str
-
-    queries: List[str]
-
-    status: str
-
-    type: Literal["file_search_call"]
-
-    results: Optional[List[Dict[str, Union[bool, float, str, List[object], object, None]]]] = None
-
-
-class DataOutputOpenAIResponseOutputMessageFunctionToolCall(BaseModel):
-    arguments: str
-
-    call_id: str
-
-    name: str
-
-    type: Literal["function_call"]
-
-    id: Optional[str] = None
-
-    status: Optional[str] = None
-
-
-class DataOutputOpenAIResponseOutputMessageMcpCall(BaseModel):
-    id: str
-
-    arguments: str
-
-    name: str
-
-    server_label: str
-
-    type: Literal["mcp_call"]
-
-    error: Optional[str] = None
-
-    output: Optional[str] = None
-
-
-class DataOutputOpenAIResponseOutputMessageMcpListToolsTool(BaseModel):
-    input_schema: Dict[str, Union[bool, float, str, List[object], object, None]]
-
-    name: str
-
-    description: Optional[str] = None
-
-
-class DataOutputOpenAIResponseOutputMessageMcpListTools(BaseModel):
-    id: str
-
-    server_label: str
-
-    tools: List[DataOutputOpenAIResponseOutputMessageMcpListToolsTool]
-
-    type: Literal["mcp_list_tools"]
-
-
-DataOutput: TypeAlias = Annotated[
-    Union[
-        DataOutputOpenAIResponseMessage,
-        DataOutputOpenAIResponseOutputMessageWebSearchToolCall,
-        DataOutputOpenAIResponseOutputMessageFileSearchToolCall,
-        DataOutputOpenAIResponseOutputMessageFunctionToolCall,
-        DataOutputOpenAIResponseOutputMessageMcpCall,
-        DataOutputOpenAIResponseOutputMessageMcpListTools,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class DataTextFormat(BaseModel):
-    type: Literal["text", "json_schema", "json_object"]
-    """Must be "text", "json_schema", or "json_object" to identify the format type"""
-
-    description: Optional[str] = None
-    """(Optional) A description of the response format. Only used for json_schema."""
-
-    name: Optional[str] = None
-    """The name of the response format. Only used for json_schema."""
-
-    schema_: Optional[Dict[str, Union[bool, float, str, List[object], object, None]]] = FieldInfo(
-        alias="schema", default=None
-    )
-    """The JSON schema the response should conform to.
-
-    In a Python SDK, this is often a `pydantic` model. Only used for json_schema.
-    """
-
-    strict: Optional[bool] = None
-    """(Optional) Whether to strictly enforce the JSON schema.
-
-    If true, the response must match the schema exactly. Only used for json_schema.
-    """
-
-
-class DataText(BaseModel):
-    format: Optional[DataTextFormat] = None
-    """Configuration for Responses API text format."""
-
-
-class DataError(BaseModel):
-    code: str
-
-    message: str
-
-
-class Data(BaseModel):
-    id: str
-
-    created_at: int
-
-    input: List[DataInput]
-
-    model: str
-
-    object: Literal["response"]
-
-    output: List[DataOutput]
-
-    parallel_tool_calls: bool
-
-    status: str
-
-    text: DataText
-
-    error: Optional[DataError] = None
-
-    previous_response_id: Optional[str] = None
-
-    temperature: Optional[float] = None
-
-    top_p: Optional[float] = None
-
-    truncation: Optional[str] = None
-
-    user: Optional[str] = None
-
-
-class ResponseListResponse(BaseModel):
-    data: List[Data]
-
-    first_id: str
-
-    has_more: bool
-
-    last_id: str
-
-    object: Literal["list"]
diff --git a/src/llama_stack_client/types/response_object.py b/src/llama_stack_client/types/response_object.py
deleted file mode 100644
index e4b313d3..00000000
--- a/src/llama_stack_client/types/response_object.py
+++ /dev/null
@@ -1,290 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-from typing_extensions import Literal, Annotated, TypeAlias
-
-from pydantic import Field as FieldInfo
-
-from .._utils import PropertyInfo
-from .._models import BaseModel
-
-__all__ = [
-    "ResponseObject",
-    "Output",
-    "OutputOpenAIResponseMessage",
-    "OutputOpenAIResponseMessageContentUnionMember1",
-    "OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
-    "OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
-    "OutputOpenAIResponseMessageContentUnionMember2",
-    "OutputOpenAIResponseMessageContentUnionMember2Annotation",
-    "OutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation",
-    "OutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation",
-    "OutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation",
-    "OutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath",
-    "OutputOpenAIResponseOutputMessageWebSearchToolCall",
-    "OutputOpenAIResponseOutputMessageFileSearchToolCall",
-    "OutputOpenAIResponseOutputMessageFunctionToolCall",
-    "OutputOpenAIResponseOutputMessageMcpCall",
-    "OutputOpenAIResponseOutputMessageMcpListTools",
-    "OutputOpenAIResponseOutputMessageMcpListToolsTool",
-    "Text",
-    "TextFormat",
-    "Error",
-]
-
-
-class OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText(BaseModel):
-    text: str
-
-    type: Literal["input_text"]
-
-
-class OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage(BaseModel):
-    detail: Literal["low", "high", "auto"]
-
-    type: Literal["input_image"]
-
-    image_url: Optional[str] = None
-
-
-OutputOpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[
-    Union[
-        OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
-        OutputOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class OutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation(BaseModel):
-    file_id: str
-
-    filename: str
-
-    index: int
-
-    type: Literal["file_citation"]
-
-
-class OutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation(BaseModel):
-    end_index: int
-
-    start_index: int
-
-    title: str
-
-    type: Literal["url_citation"]
-
-    url: str
-
-
-class OutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation(BaseModel):
-    container_id: str
-
-    end_index: int
-
-    file_id: str
-
-    filename: str
-
-    start_index: int
-
-    type: Literal["container_file_citation"]
-
-
-class OutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath(BaseModel):
-    file_id: str
-
-    index: int
-
-    type: Literal["file_path"]
-
-
-OutputOpenAIResponseMessageContentUnionMember2Annotation: TypeAlias = Annotated[
-    Union[
-        OutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation,
-        OutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation,
-        OutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation,
-        OutputOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class OutputOpenAIResponseMessageContentUnionMember2(BaseModel):
-    annotations: List[OutputOpenAIResponseMessageContentUnionMember2Annotation]
-
-    text: str
-
-    type: Literal["output_text"]
-
-
-class OutputOpenAIResponseMessage(BaseModel):
-    content: Union[
-        str, List[OutputOpenAIResponseMessageContentUnionMember1], List[OutputOpenAIResponseMessageContentUnionMember2]
-    ]
-
-    role: Literal["system", "developer", "user", "assistant"]
-
-    type: Literal["message"]
-
-    id: Optional[str] = None
-
-    status: Optional[str] = None
-
-
-class OutputOpenAIResponseOutputMessageWebSearchToolCall(BaseModel):
-    id: str
-
-    status: str
-
-    type: Literal["web_search_call"]
-
-
-class OutputOpenAIResponseOutputMessageFileSearchToolCall(BaseModel):
-    id: str
-
-    queries: List[str]
-
-    status: str
-
-    type: Literal["file_search_call"]
-
-    results: Optional[List[Dict[str, Union[bool, float, str, List[object], object, None]]]] = None
-
-
-class OutputOpenAIResponseOutputMessageFunctionToolCall(BaseModel):
-    arguments: str
-
-    call_id: str
-
-    name: str
-
-    type: Literal["function_call"]
-
-    id: Optional[str] = None
-
-    status: Optional[str] = None
-
-
-class OutputOpenAIResponseOutputMessageMcpCall(BaseModel):
-    id: str
-
-    arguments: str
-
-    name: str
-
-    server_label: str
-
-    type: Literal["mcp_call"]
-
-    error: Optional[str] = None
-
-    output: Optional[str] = None
-
-
-class OutputOpenAIResponseOutputMessageMcpListToolsTool(BaseModel):
-    input_schema: Dict[str, Union[bool, float, str, List[object], object, None]]
-
-    name: str
-
-    description: Optional[str] = None
-
-
-class OutputOpenAIResponseOutputMessageMcpListTools(BaseModel):
-    id: str
-
-    server_label: str
-
-    tools: List[OutputOpenAIResponseOutputMessageMcpListToolsTool]
-
-    type: Literal["mcp_list_tools"]
-
-
-Output: TypeAlias = Annotated[
-    Union[
-        OutputOpenAIResponseMessage,
-        OutputOpenAIResponseOutputMessageWebSearchToolCall,
-        OutputOpenAIResponseOutputMessageFileSearchToolCall,
-        OutputOpenAIResponseOutputMessageFunctionToolCall,
-        OutputOpenAIResponseOutputMessageMcpCall,
-        OutputOpenAIResponseOutputMessageMcpListTools,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class TextFormat(BaseModel):
-    type: Literal["text", "json_schema", "json_object"]
-    """Must be "text", "json_schema", or "json_object" to identify the format type"""
-
-    description: Optional[str] = None
-    """(Optional) A description of the response format. Only used for json_schema."""
-
-    name: Optional[str] = None
-    """The name of the response format. Only used for json_schema."""
-
-    schema_: Optional[Dict[str, Union[bool, float, str, List[object], object, None]]] = FieldInfo(
-        alias="schema", default=None
-    )
-    """The JSON schema the response should conform to.
-
-    In a Python SDK, this is often a `pydantic` model. Only used for json_schema.
-    """
-
-    strict: Optional[bool] = None
-    """(Optional) Whether to strictly enforce the JSON schema.
-
-    If true, the response must match the schema exactly. Only used for json_schema.
-    """
-
-
-class Text(BaseModel):
-    format: Optional[TextFormat] = None
-    """Configuration for Responses API text format."""
-
-
-class Error(BaseModel):
-    code: str
-
-    message: str
-
-
-class ResponseObject(BaseModel):
-    @property
-    def output_text(self) -> str:
-        texts: List[str] = []
-        for output in self.output:
-            if output.type == "message":
-                for content in output.content:
-                    if content.type == "output_text":
-                        texts.append(content.text)
-        return "".join(texts)
-
-    id: str
-
-    created_at: int
-
-    model: str
-
-    object: Literal["response"]
-
-    output: List[Output]
-
-    parallel_tool_calls: bool
-
-    status: str
-
-    text: Text
-
-    error: Optional[Error] = None
-
-    previous_response_id: Optional[str] = None
-
-    temperature: Optional[float] = None
-
-    top_p: Optional[float] = None
-
-    truncation: Optional[str] = None
-
-    user: Optional[str] = None
diff --git a/src/llama_stack_client/types/response_object_stream.py b/src/llama_stack_client/types/response_object_stream.py
deleted file mode 100644
index 311ad6bd..00000000
--- a/src/llama_stack_client/types/response_object_stream.py
+++ /dev/null
@@ -1,677 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-from typing_extensions import Literal, Annotated, TypeAlias
-
-from .._utils import PropertyInfo
-from .._models import BaseModel
-from .response_object import ResponseObject
-
-__all__ = [
-    "ResponseObjectStream",
-    "OpenAIResponseObjectStreamResponseCreated",
-    "OpenAIResponseObjectStreamResponseOutputItemAdded",
-    "OpenAIResponseObjectStreamResponseOutputItemAddedItem",
-    "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessage",
-    "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1",
-    "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
-    "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
-    "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2",
-    "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2Annotation",
-    "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation",
-    "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation",
-    "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation",
-    "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath",
-    "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseOutputMessageWebSearchToolCall",
-    "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseOutputMessageFileSearchToolCall",
-    "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseOutputMessageFunctionToolCall",
-    "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseOutputMessageMcpCall",
-    "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseOutputMessageMcpListTools",
-    "OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseOutputMessageMcpListToolsTool",
-    "OpenAIResponseObjectStreamResponseOutputItemDone",
-    "OpenAIResponseObjectStreamResponseOutputItemDoneItem",
-    "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessage",
-    "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1",
-    "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
-    "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
-    "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2",
-    "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2Annotation",
-    "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation",
-    "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation",
-    "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation",
-    "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath",
-    "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseOutputMessageWebSearchToolCall",
-    "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseOutputMessageFileSearchToolCall",
-    "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseOutputMessageFunctionToolCall",
-    "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseOutputMessageMcpCall",
-    "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseOutputMessageMcpListTools",
-    "OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseOutputMessageMcpListToolsTool",
-    "OpenAIResponseObjectStreamResponseOutputTextDelta",
-    "OpenAIResponseObjectStreamResponseOutputTextDone",
-    "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta",
-    "OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone",
-    "OpenAIResponseObjectStreamResponseWebSearchCallInProgress",
-    "OpenAIResponseObjectStreamResponseWebSearchCallSearching",
-    "OpenAIResponseObjectStreamResponseWebSearchCallCompleted",
-    "OpenAIResponseObjectStreamResponseMcpListToolsInProgress",
-    "OpenAIResponseObjectStreamResponseMcpListToolsFailed",
-    "OpenAIResponseObjectStreamResponseMcpListToolsCompleted",
-    "OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta",
-    "OpenAIResponseObjectStreamResponseMcpCallArgumentsDone",
-    "OpenAIResponseObjectStreamResponseMcpCallInProgress",
-    "OpenAIResponseObjectStreamResponseMcpCallFailed",
-    "OpenAIResponseObjectStreamResponseMcpCallCompleted",
-    "OpenAIResponseObjectStreamResponseCompleted",
-]
-
-
-class OpenAIResponseObjectStreamResponseCreated(BaseModel):
-    response: ResponseObject
-
-    type: Literal["response.created"]
-
-
-class OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText(
-    BaseModel
-):
-    text: str
-
-    type: Literal["input_text"]
-
-
-class OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage(
-    BaseModel
-):
-    detail: Literal["low", "high", "auto"]
-
-    type: Literal["input_image"]
-
-    image_url: Optional[str] = None
-
-
-OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[
-    Union[
-        OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
-        OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation(
-    BaseModel
-):
-    file_id: str
-
-    filename: str
-
-    index: int
-
-    type: Literal["file_citation"]
-
-
-class OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation(
-    BaseModel
-):
-    end_index: int
-
-    start_index: int
-
-    title: str
-
-    type: Literal["url_citation"]
-
-    url: str
-
-
-class OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation(
-    BaseModel
-):
-    container_id: str
-
-    end_index: int
-
-    file_id: str
-
-    filename: str
-
-    start_index: int
-
-    type: Literal["container_file_citation"]
-
-
-class OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath(
-    BaseModel
-):
-    file_id: str
-
-    index: int
-
-    type: Literal["file_path"]
-
-
-OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2Annotation: TypeAlias = Annotated[
-    Union[
-        OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation,
-        OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation,
-        OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation,
-        OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2(BaseModel):
-    annotations: List[
-        OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2Annotation
-    ]
-
-    text: str
-
-    type: Literal["output_text"]
-
-
-class OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessage(BaseModel):
-    content: Union[
-        str,
-        List[OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember1],
-        List[OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessageContentUnionMember2],
-    ]
-
-    role: Literal["system", "developer", "user", "assistant"]
-
-    type: Literal["message"]
-
-    id: Optional[str] = None
-
-    status: Optional[str] = None
-
-
-class OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseOutputMessageWebSearchToolCall(BaseModel):
-    id: str
-
-    status: str
-
-    type: Literal["web_search_call"]
-
-
-class OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseOutputMessageFileSearchToolCall(BaseModel):
-    id: str
-
-    queries: List[str]
-
-    status: str
-
-    type: Literal["file_search_call"]
-
-    results: Optional[List[Dict[str, Union[bool, float, str, List[object], object, None]]]] = None
-
-
-class OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseOutputMessageFunctionToolCall(BaseModel):
-    arguments: str
-
-    call_id: str
-
-    name: str
-
-    type: Literal["function_call"]
-
-    id: Optional[str] = None
-
-    status: Optional[str] = None
-
-
-class OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseOutputMessageMcpCall(BaseModel):
-    id: str
-
-    arguments: str
-
-    name: str
-
-    server_label: str
-
-    type: Literal["mcp_call"]
-
-    error: Optional[str] = None
-
-    output: Optional[str] = None
-
-
-class OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseOutputMessageMcpListToolsTool(BaseModel):
-    input_schema: Dict[str, Union[bool, float, str, List[object], object, None]]
-
-    name: str
-
-    description: Optional[str] = None
-
-
-class OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseOutputMessageMcpListTools(BaseModel):
-    id: str
-
-    server_label: str
-
-    tools: List[OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseOutputMessageMcpListToolsTool]
-
-    type: Literal["mcp_list_tools"]
-
-
-OpenAIResponseObjectStreamResponseOutputItemAddedItem: TypeAlias = Annotated[
-    Union[
-        OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseMessage,
-        OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseOutputMessageWebSearchToolCall,
-        OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseOutputMessageFileSearchToolCall,
-        OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseOutputMessageFunctionToolCall,
-        OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseOutputMessageMcpCall,
-        OpenAIResponseObjectStreamResponseOutputItemAddedItemOpenAIResponseOutputMessageMcpListTools,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class OpenAIResponseObjectStreamResponseOutputItemAdded(BaseModel):
-    item: OpenAIResponseObjectStreamResponseOutputItemAddedItem
-    """Corresponds to the various Message types in the Responses API.
-
-    They are all under one type because the Responses API gives them all the same
-    "type" value, and there is no way to tell them apart in certain scenarios.
-    """
-
-    output_index: int
-
-    response_id: str
-
-    sequence_number: int
-
-    type: Literal["response.output_item.added"]
-
-
-class OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText(
-    BaseModel
-):
-    text: str
-
-    type: Literal["input_text"]
-
-
-class OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage(
-    BaseModel
-):
-    detail: Literal["low", "high", "auto"]
-
-    type: Literal["input_image"]
-
-    image_url: Optional[str] = None
-
-
-OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[
-    Union[
-        OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
-        OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation(
-    BaseModel
-):
-    file_id: str
-
-    filename: str
-
-    index: int
-
-    type: Literal["file_citation"]
-
-
-class OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation(
-    BaseModel
-):
-    end_index: int
-
-    start_index: int
-
-    title: str
-
-    type: Literal["url_citation"]
-
-    url: str
-
-
-class OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation(
-    BaseModel
-):
-    container_id: str
-
-    end_index: int
-
-    file_id: str
-
-    filename: str
-
-    start_index: int
-
-    type: Literal["container_file_citation"]
-
-
-class OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath(
-    BaseModel
-):
-    file_id: str
-
-    index: int
-
-    type: Literal["file_path"]
-
-
-OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2Annotation: TypeAlias = Annotated[
-    Union[
-        OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation,
-        OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation,
-        OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation,
-        OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2(BaseModel):
-    annotations: List[
-        OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2Annotation
-    ]
-
-    text: str
-
-    type: Literal["output_text"]
-
-
-class OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessage(BaseModel):
-    content: Union[
-        str,
-        List[OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember1],
-        List[OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessageContentUnionMember2],
-    ]
-
-    role: Literal["system", "developer", "user", "assistant"]
-
-    type: Literal["message"]
-
-    id: Optional[str] = None
-
-    status: Optional[str] = None
-
-
-class OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseOutputMessageWebSearchToolCall(BaseModel):
-    id: str
-
-    status: str
-
-    type: Literal["web_search_call"]
-
-
-class OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseOutputMessageFileSearchToolCall(BaseModel):
-    id: str
-
-    queries: List[str]
-
-    status: str
-
-    type: Literal["file_search_call"]
-
-    results: Optional[List[Dict[str, Union[bool, float, str, List[object], object, None]]]] = None
-
-
-class OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseOutputMessageFunctionToolCall(BaseModel):
-    arguments: str
-
-    call_id: str
-
-    name: str
-
-    type: Literal["function_call"]
-
-    id: Optional[str] = None
-
-    status: Optional[str] = None
-
-
-class OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseOutputMessageMcpCall(BaseModel):
-    id: str
-
-    arguments: str
-
-    name: str
-
-    server_label: str
-
-    type: Literal["mcp_call"]
-
-    error: Optional[str] = None
-
-    output: Optional[str] = None
-
-
-class OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseOutputMessageMcpListToolsTool(BaseModel):
-    input_schema: Dict[str, Union[bool, float, str, List[object], object, None]]
-
-    name: str
-
-    description: Optional[str] = None
-
-
-class OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseOutputMessageMcpListTools(BaseModel):
-    id: str
-
-    server_label: str
-
-    tools: List[OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseOutputMessageMcpListToolsTool]
-
-    type: Literal["mcp_list_tools"]
-
-
-OpenAIResponseObjectStreamResponseOutputItemDoneItem: TypeAlias = Annotated[
-    Union[
-        OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseMessage,
-        OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseOutputMessageWebSearchToolCall,
-        OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseOutputMessageFileSearchToolCall,
-        OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseOutputMessageFunctionToolCall,
-        OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseOutputMessageMcpCall,
-        OpenAIResponseObjectStreamResponseOutputItemDoneItemOpenAIResponseOutputMessageMcpListTools,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class OpenAIResponseObjectStreamResponseOutputItemDone(BaseModel):
-    item: OpenAIResponseObjectStreamResponseOutputItemDoneItem
-    """Corresponds to the various Message types in the Responses API.
-
-    They are all under one type because the Responses API gives them all the same
-    "type" value, and there is no way to tell them apart in certain scenarios.
-    """
-
-    output_index: int
-
-    response_id: str
-
-    sequence_number: int
-
-    type: Literal["response.output_item.done"]
-
-
-class OpenAIResponseObjectStreamResponseOutputTextDelta(BaseModel):
-    content_index: int
-
-    delta: str
-
-    item_id: str
-
-    output_index: int
-
-    sequence_number: int
-
-    type: Literal["response.output_text.delta"]
-
-
-class OpenAIResponseObjectStreamResponseOutputTextDone(BaseModel):
-    content_index: int
-
-    item_id: str
-
-    output_index: int
-
-    sequence_number: int
-
-    text: str
-
-    type: Literal["response.output_text.done"]
-
-
-class OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta(BaseModel):
-    delta: str
-
-    item_id: str
-
-    output_index: int
-
-    sequence_number: int
-
-    type: Literal["response.function_call_arguments.delta"]
-
-
-class OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone(BaseModel):
-    arguments: str
-
-    item_id: str
-
-    output_index: int
-
-    sequence_number: int
-
-    type: Literal["response.function_call_arguments.done"]
-
-
-class OpenAIResponseObjectStreamResponseWebSearchCallInProgress(BaseModel):
-    item_id: str
-
-    output_index: int
-
-    sequence_number: int
-
-    type: Literal["response.web_search_call.in_progress"]
-
-
-class OpenAIResponseObjectStreamResponseWebSearchCallSearching(BaseModel):
-    item_id: str
-
-    output_index: int
-
-    sequence_number: int
-
-    type: Literal["response.web_search_call.searching"]
-
-
-class OpenAIResponseObjectStreamResponseWebSearchCallCompleted(BaseModel):
-    item_id: str
-
-    output_index: int
-
-    sequence_number: int
-
-    type: Literal["response.web_search_call.completed"]
-
-
-class OpenAIResponseObjectStreamResponseMcpListToolsInProgress(BaseModel):
-    sequence_number: int
-
-    type: Literal["response.mcp_list_tools.in_progress"]
-
-
-class OpenAIResponseObjectStreamResponseMcpListToolsFailed(BaseModel):
-    sequence_number: int
-
-    type: Literal["response.mcp_list_tools.failed"]
-
-
-class OpenAIResponseObjectStreamResponseMcpListToolsCompleted(BaseModel):
-    sequence_number: int
-
-    type: Literal["response.mcp_list_tools.completed"]
-
-
-class OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta(BaseModel):
-    delta: str
-
-    item_id: str
-
-    output_index: int
-
-    sequence_number: int
-
-    type: Literal["response.mcp_call.arguments.delta"]
-
-
-class OpenAIResponseObjectStreamResponseMcpCallArgumentsDone(BaseModel):
-    arguments: str
-
-    item_id: str
-
-    output_index: int
-
-    sequence_number: int
-
-    type: Literal["response.mcp_call.arguments.done"]
-
-
-class OpenAIResponseObjectStreamResponseMcpCallInProgress(BaseModel):
-    item_id: str
-
-    output_index: int
-
-    sequence_number: int
-
-    type: Literal["response.mcp_call.in_progress"]
-
-
-class OpenAIResponseObjectStreamResponseMcpCallFailed(BaseModel):
-    sequence_number: int
-
-    type: Literal["response.mcp_call.failed"]
-
-
-class OpenAIResponseObjectStreamResponseMcpCallCompleted(BaseModel):
-    sequence_number: int
-
-    type: Literal["response.mcp_call.completed"]
-
-
-class OpenAIResponseObjectStreamResponseCompleted(BaseModel):
-    response: ResponseObject
-
-    type: Literal["response.completed"]
-
-
-ResponseObjectStream: TypeAlias = Annotated[
-    Union[
-        OpenAIResponseObjectStreamResponseCreated,
-        OpenAIResponseObjectStreamResponseOutputItemAdded,
-        OpenAIResponseObjectStreamResponseOutputItemDone,
-        OpenAIResponseObjectStreamResponseOutputTextDelta,
-        OpenAIResponseObjectStreamResponseOutputTextDone,
-        OpenAIResponseObjectStreamResponseFunctionCallArgumentsDelta,
-        OpenAIResponseObjectStreamResponseFunctionCallArgumentsDone,
-        OpenAIResponseObjectStreamResponseWebSearchCallInProgress,
-        OpenAIResponseObjectStreamResponseWebSearchCallSearching,
-        OpenAIResponseObjectStreamResponseWebSearchCallCompleted,
-        OpenAIResponseObjectStreamResponseMcpListToolsInProgress,
-        OpenAIResponseObjectStreamResponseMcpListToolsFailed,
-        OpenAIResponseObjectStreamResponseMcpListToolsCompleted,
-        OpenAIResponseObjectStreamResponseMcpCallArgumentsDelta,
-        OpenAIResponseObjectStreamResponseMcpCallArgumentsDone,
-        OpenAIResponseObjectStreamResponseMcpCallInProgress,
-        OpenAIResponseObjectStreamResponseMcpCallFailed,
-        OpenAIResponseObjectStreamResponseMcpCallCompleted,
-        OpenAIResponseObjectStreamResponseCompleted,
-    ],
-    PropertyInfo(discriminator="type"),
-]
diff --git a/src/llama_stack_client/types/responses/__init__.py b/src/llama_stack_client/types/responses/__init__.py
deleted file mode 100644
index cb934712..00000000
--- a/src/llama_stack_client/types/responses/__init__.py
+++ /dev/null
@@ -1,6 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from .input_item_list_params import InputItemListParams as InputItemListParams
-from .input_item_list_response import InputItemListResponse as InputItemListResponse
diff --git a/src/llama_stack_client/types/responses/input_item_list_params.py b/src/llama_stack_client/types/responses/input_item_list_params.py
deleted file mode 100644
index a78bd215..00000000
--- a/src/llama_stack_client/types/responses/input_item_list_params.py
+++ /dev/null
@@ -1,28 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import List
-from typing_extensions import Literal, TypedDict
-
-__all__ = ["InputItemListParams"]
-
-
-class InputItemListParams(TypedDict, total=False):
-    after: str
-    """An item ID to list items after, used for pagination."""
-
-    before: str
-    """An item ID to list items before, used for pagination."""
-
-    include: List[str]
-    """Additional fields to include in the response."""
-
-    limit: int
-    """A limit on the number of objects to be returned.
-
-    Limit can range between 1 and 100, and the default is 20.
-    """
-
-    order: Literal["asc", "desc"]
-    """The order to return the input items in. Default is desc."""
diff --git a/src/llama_stack_client/types/responses/input_item_list_response.py b/src/llama_stack_client/types/responses/input_item_list_response.py
deleted file mode 100644
index aadcd9f2..00000000
--- a/src/llama_stack_client/types/responses/input_item_list_response.py
+++ /dev/null
@@ -1,187 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-from typing_extensions import Literal, Annotated, TypeAlias
-
-from ..._utils import PropertyInfo
-from ..._models import BaseModel
-
-__all__ = [
-    "InputItemListResponse",
-    "Data",
-    "DataOpenAIResponseOutputMessageWebSearchToolCall",
-    "DataOpenAIResponseOutputMessageFileSearchToolCall",
-    "DataOpenAIResponseOutputMessageFunctionToolCall",
-    "DataOpenAIResponseInputFunctionToolCallOutput",
-    "DataOpenAIResponseMessage",
-    "DataOpenAIResponseMessageContentUnionMember1",
-    "DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText",
-    "DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage",
-    "DataOpenAIResponseMessageContentUnionMember2",
-    "DataOpenAIResponseMessageContentUnionMember2Annotation",
-    "DataOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation",
-    "DataOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation",
-    "DataOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation",
-    "DataOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath",
-]
-
-
-class DataOpenAIResponseOutputMessageWebSearchToolCall(BaseModel):
-    id: str
-
-    status: str
-
-    type: Literal["web_search_call"]
-
-
-class DataOpenAIResponseOutputMessageFileSearchToolCall(BaseModel):
-    id: str
-
-    queries: List[str]
-
-    status: str
-
-    type: Literal["file_search_call"]
-
-    results: Optional[List[Dict[str, Union[bool, float, str, List[object], object, None]]]] = None
-
-
-class DataOpenAIResponseOutputMessageFunctionToolCall(BaseModel):
-    arguments: str
-
-    call_id: str
-
-    name: str
-
-    type: Literal["function_call"]
-
-    id: Optional[str] = None
-
-    status: Optional[str] = None
-
-
-class DataOpenAIResponseInputFunctionToolCallOutput(BaseModel):
-    call_id: str
-
-    output: str
-
-    type: Literal["function_call_output"]
-
-    id: Optional[str] = None
-
-    status: Optional[str] = None
-
-
-class DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText(BaseModel):
-    text: str
-
-    type: Literal["input_text"]
-
-
-class DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage(BaseModel):
-    detail: Literal["low", "high", "auto"]
-
-    type: Literal["input_image"]
-
-    image_url: Optional[str] = None
-
-
-DataOpenAIResponseMessageContentUnionMember1: TypeAlias = Annotated[
-    Union[
-        DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentText,
-        DataOpenAIResponseMessageContentUnionMember1OpenAIResponseInputMessageContentImage,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class DataOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation(BaseModel):
-    file_id: str
-
-    filename: str
-
-    index: int
-
-    type: Literal["file_citation"]
-
-
-class DataOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation(BaseModel):
-    end_index: int
-
-    start_index: int
-
-    title: str
-
-    type: Literal["url_citation"]
-
-    url: str
-
-
-class DataOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation(BaseModel):
-    container_id: str
-
-    end_index: int
-
-    file_id: str
-
-    filename: str
-
-    start_index: int
-
-    type: Literal["container_file_citation"]
-
-
-class DataOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath(BaseModel):
-    file_id: str
-
-    index: int
-
-    type: Literal["file_path"]
-
-
-DataOpenAIResponseMessageContentUnionMember2Annotation: TypeAlias = Annotated[
-    Union[
-        DataOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFileCitation,
-        DataOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationCitation,
-        DataOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationContainerFileCitation,
-        DataOpenAIResponseMessageContentUnionMember2AnnotationOpenAIResponseAnnotationFilePath,
-    ],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class DataOpenAIResponseMessageContentUnionMember2(BaseModel):
-    annotations: List[DataOpenAIResponseMessageContentUnionMember2Annotation]
-
-    text: str
-
-    type: Literal["output_text"]
-
-
-class DataOpenAIResponseMessage(BaseModel):
-    content: Union[
-        str, List[DataOpenAIResponseMessageContentUnionMember1], List[DataOpenAIResponseMessageContentUnionMember2]
-    ]
-
-    role: Literal["system", "developer", "user", "assistant"]
-
-    type: Literal["message"]
-
-    id: Optional[str] = None
-
-    status: Optional[str] = None
-
-
-Data: TypeAlias = Union[
-    DataOpenAIResponseOutputMessageWebSearchToolCall,
-    DataOpenAIResponseOutputMessageFileSearchToolCall,
-    DataOpenAIResponseOutputMessageFunctionToolCall,
-    DataOpenAIResponseInputFunctionToolCallOutput,
-    DataOpenAIResponseMessage,
-]
-
-
-class InputItemListResponse(BaseModel):
-    data: List[Data]
-
-    object: Literal["list"]
diff --git a/src/llama_stack_client/types/route_info.py b/src/llama_stack_client/types/route_info.py
deleted file mode 100644
index 3d8880f1..00000000
--- a/src/llama_stack_client/types/route_info.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List
-
-from .._models import BaseModel
-
-__all__ = ["RouteInfo"]
-
-
-class RouteInfo(BaseModel):
-    method: str
-
-    provider_types: List[str]
-
-    route: str
diff --git a/src/llama_stack_client/types/route_list_response.py b/src/llama_stack_client/types/route_list_response.py
deleted file mode 100644
index cec8e0e1..00000000
--- a/src/llama_stack_client/types/route_list_response.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List
-from typing_extensions import TypeAlias
-
-from .route_info import RouteInfo
-
-__all__ = ["RouteListResponse"]
-
-RouteListResponse: TypeAlias = List[RouteInfo]
diff --git a/src/llama_stack_client/types/run_shield_response.py b/src/llama_stack_client/types/run_shield_response.py
deleted file mode 100644
index 1dbdf5a0..00000000
--- a/src/llama_stack_client/types/run_shield_response.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Optional
-
-from .._models import BaseModel
-from .shared.safety_violation import SafetyViolation
-
-__all__ = ["RunShieldResponse"]
-
-
-class RunShieldResponse(BaseModel):
-    violation: Optional[SafetyViolation] = None
diff --git a/src/llama_stack_client/types/safety_run_shield_params.py b/src/llama_stack_client/types/safety_run_shield_params.py
deleted file mode 100644
index 764be674..00000000
--- a/src/llama_stack_client/types/safety_run_shield_params.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union, Iterable
-from typing_extensions import Required, TypedDict
-
-from .shared_params.message import Message
-
-__all__ = ["SafetyRunShieldParams"]
-
-
-class SafetyRunShieldParams(TypedDict, total=False):
-    messages: Required[Iterable[Message]]
-    """The messages to run the shield on."""
-
-    params: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-    """The parameters of the shield."""
-
-    shield_id: Required[str]
-    """The identifier of the shield to run."""
diff --git a/src/llama_stack_client/types/scoring_fn.py b/src/llama_stack_client/types/scoring_fn.py
deleted file mode 100644
index 3569cb44..00000000
--- a/src/llama_stack_client/types/scoring_fn.py
+++ /dev/null
@@ -1,28 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-from typing_extensions import Literal
-
-from .._models import BaseModel
-from .scoring_fn_params import ScoringFnParams
-from .shared.return_type import ReturnType
-
-__all__ = ["ScoringFn"]
-
-
-class ScoringFn(BaseModel):
-    identifier: str
-
-    metadata: Dict[str, Union[bool, float, str, List[object], object, None]]
-
-    provider_id: str
-
-    return_type: ReturnType
-
-    type: Literal["scoring_function"]
-
-    description: Optional[str] = None
-
-    params: Optional[ScoringFnParams] = None
-
-    provider_resource_id: Optional[str] = None
diff --git a/src/llama_stack_client/types/scoring_fn_params.py b/src/llama_stack_client/types/scoring_fn_params.py
deleted file mode 100644
index a46b46f5..00000000
--- a/src/llama_stack_client/types/scoring_fn_params.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Union, Optional
-from typing_extensions import Literal, Annotated, TypeAlias
-
-from .._utils import PropertyInfo
-from .._models import BaseModel
-
-__all__ = ["ScoringFnParams", "LlmAsJudgeScoringFnParams", "RegexParserScoringFnParams", "BasicScoringFnParams"]
-
-
-class LlmAsJudgeScoringFnParams(BaseModel):
-    aggregation_functions: List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]]
-
-    judge_model: str
-
-    judge_score_regexes: List[str]
-
-    type: Literal["llm_as_judge"]
-
-    prompt_template: Optional[str] = None
-
-
-class RegexParserScoringFnParams(BaseModel):
-    aggregation_functions: List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]]
-
-    parsing_regexes: List[str]
-
-    type: Literal["regex_parser"]
-
-
-class BasicScoringFnParams(BaseModel):
-    aggregation_functions: List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]]
-
-    type: Literal["basic"]
-
-
-ScoringFnParams: TypeAlias = Annotated[
-    Union[LlmAsJudgeScoringFnParams, RegexParserScoringFnParams, BasicScoringFnParams],
-    PropertyInfo(discriminator="type"),
-]
diff --git a/src/llama_stack_client/types/scoring_fn_params_param.py b/src/llama_stack_client/types/scoring_fn_params_param.py
deleted file mode 100644
index b404bc89..00000000
--- a/src/llama_stack_client/types/scoring_fn_params_param.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import List, Union
-from typing_extensions import Literal, Required, TypeAlias, TypedDict
-
-__all__ = ["ScoringFnParamsParam", "LlmAsJudgeScoringFnParams", "RegexParserScoringFnParams", "BasicScoringFnParams"]
-
-
-class LlmAsJudgeScoringFnParams(TypedDict, total=False):
-    aggregation_functions: Required[
-        List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]]
-    ]
-
-    judge_model: Required[str]
-
-    judge_score_regexes: Required[List[str]]
-
-    type: Required[Literal["llm_as_judge"]]
-
-    prompt_template: str
-
-
-class RegexParserScoringFnParams(TypedDict, total=False):
-    aggregation_functions: Required[
-        List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]]
-    ]
-
-    parsing_regexes: Required[List[str]]
-
-    type: Required[Literal["regex_parser"]]
-
-
-class BasicScoringFnParams(TypedDict, total=False):
-    aggregation_functions: Required[
-        List[Literal["average", "weighted_average", "median", "categorical_count", "accuracy"]]
-    ]
-
-    type: Required[Literal["basic"]]
-
-
-ScoringFnParamsParam: TypeAlias = Union[LlmAsJudgeScoringFnParams, RegexParserScoringFnParams, BasicScoringFnParams]
diff --git a/src/llama_stack_client/types/scoring_function_list_response.py b/src/llama_stack_client/types/scoring_function_list_response.py
deleted file mode 100644
index bad85a54..00000000
--- a/src/llama_stack_client/types/scoring_function_list_response.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List
-from typing_extensions import TypeAlias
-
-from .scoring_fn import ScoringFn
-
-__all__ = ["ScoringFunctionListResponse"]
-
-ScoringFunctionListResponse: TypeAlias = List[ScoringFn]
diff --git a/src/llama_stack_client/types/scoring_function_register_params.py b/src/llama_stack_client/types/scoring_function_register_params.py
deleted file mode 100644
index c9932710..00000000
--- a/src/llama_stack_client/types/scoring_function_register_params.py
+++ /dev/null
@@ -1,32 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Required, TypedDict
-
-from .scoring_fn_params_param import ScoringFnParamsParam
-from .shared_params.return_type import ReturnType
-
-__all__ = ["ScoringFunctionRegisterParams"]
-
-
-class ScoringFunctionRegisterParams(TypedDict, total=False):
-    description: Required[str]
-    """The description of the scoring function."""
-
-    return_type: Required[ReturnType]
-
-    scoring_fn_id: Required[str]
-    """The ID of the scoring function to register."""
-
-    params: ScoringFnParamsParam
-    """
-    The parameters for the scoring function for benchmark eval, these can be
-    overridden for app eval.
-    """
-
-    provider_id: str
-    """The ID of the provider to use for the scoring function."""
-
-    provider_scoring_fn_id: str
-    """The ID of the provider scoring function to use for the scoring function."""
diff --git a/src/llama_stack_client/types/scoring_score_batch_params.py b/src/llama_stack_client/types/scoring_score_batch_params.py
deleted file mode 100644
index 28dfa86e..00000000
--- a/src/llama_stack_client/types/scoring_score_batch_params.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Optional
-from typing_extensions import Required, TypedDict
-
-from .scoring_fn_params_param import ScoringFnParamsParam
-
-__all__ = ["ScoringScoreBatchParams"]
-
-
-class ScoringScoreBatchParams(TypedDict, total=False):
-    dataset_id: Required[str]
-    """The ID of the dataset to score."""
-
-    save_results_dataset: Required[bool]
-    """Whether to save the results to a dataset."""
-
-    scoring_functions: Required[Dict[str, Optional[ScoringFnParamsParam]]]
-    """The scoring functions to use for the scoring."""
diff --git a/src/llama_stack_client/types/scoring_score_batch_response.py b/src/llama_stack_client/types/scoring_score_batch_response.py
deleted file mode 100644
index da124b1e..00000000
--- a/src/llama_stack_client/types/scoring_score_batch_response.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, Optional
-
-from .._models import BaseModel
-from .shared.scoring_result import ScoringResult
-
-__all__ = ["ScoringScoreBatchResponse"]
-
-
-class ScoringScoreBatchResponse(BaseModel):
-    results: Dict[str, ScoringResult]
-
-    dataset_id: Optional[str] = None
diff --git a/src/llama_stack_client/types/scoring_score_params.py b/src/llama_stack_client/types/scoring_score_params.py
deleted file mode 100644
index baac066e..00000000
--- a/src/llama_stack_client/types/scoring_score_params.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union, Iterable, Optional
-from typing_extensions import Required, TypedDict
-
-from .scoring_fn_params_param import ScoringFnParamsParam
-
-__all__ = ["ScoringScoreParams"]
-
-
-class ScoringScoreParams(TypedDict, total=False):
-    input_rows: Required[Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]]
-    """The rows to score."""
-
-    scoring_functions: Required[Dict[str, Optional[ScoringFnParamsParam]]]
-    """The scoring functions to use for the scoring."""
diff --git a/src/llama_stack_client/types/scoring_score_response.py b/src/llama_stack_client/types/scoring_score_response.py
deleted file mode 100644
index 083416e5..00000000
--- a/src/llama_stack_client/types/scoring_score_response.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict
-
-from .._models import BaseModel
-from .shared.scoring_result import ScoringResult
-
-__all__ = ["ScoringScoreResponse"]
-
-
-class ScoringScoreResponse(BaseModel):
-    results: Dict[str, ScoringResult]
-    """A map of scoring function name to ScoringResult."""
diff --git a/src/llama_stack_client/types/shared/__init__.py b/src/llama_stack_client/types/shared/__init__.py
deleted file mode 100644
index 0fe46810..00000000
--- a/src/llama_stack_client/types/shared/__init__.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .message import Message as Message
-from .document import Document as Document
-from .tool_call import ToolCall as ToolCall
-from .param_type import ParamType as ParamType
-from .return_type import ReturnType as ReturnType
-from .agent_config import AgentConfig as AgentConfig
-from .query_config import QueryConfig as QueryConfig
-from .query_result import QueryResult as QueryResult
-from .user_message import UserMessage as UserMessage
-from .content_delta import ContentDelta as ContentDelta
-from .scoring_result import ScoringResult as ScoringResult
-from .system_message import SystemMessage as SystemMessage
-from .response_format import ResponseFormat as ResponseFormat
-from .sampling_params import SamplingParams as SamplingParams
-from .batch_completion import BatchCompletion as BatchCompletion
-from .safety_violation import SafetyViolation as SafetyViolation
-from .completion_message import CompletionMessage as CompletionMessage
-from .interleaved_content import InterleavedContent as InterleavedContent
-from .tool_call_or_string import ToolCallOrString as ToolCallOrString
-from .tool_param_definition import ToolParamDefinition as ToolParamDefinition
-from .tool_response_message import ToolResponseMessage as ToolResponseMessage
-from .query_generator_config import QueryGeneratorConfig as QueryGeneratorConfig
-from .chat_completion_response import ChatCompletionResponse as ChatCompletionResponse
-from .interleaved_content_item import InterleavedContentItem as InterleavedContentItem
diff --git a/src/llama_stack_client/types/shared/agent_config.py b/src/llama_stack_client/types/shared/agent_config.py
deleted file mode 100644
index eb116159..00000000
--- a/src/llama_stack_client/types/shared/agent_config.py
+++ /dev/null
@@ -1,92 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-from typing_extensions import Literal, TypeAlias
-
-from ..._models import BaseModel
-from ..tool_def import ToolDef
-from .response_format import ResponseFormat
-from .sampling_params import SamplingParams
-
-__all__ = ["AgentConfig", "ToolConfig", "Toolgroup", "ToolgroupAgentToolGroupWithArgs"]
-
-
-class ToolConfig(BaseModel):
-    system_message_behavior: Optional[Literal["append", "replace"]] = None
-    """(Optional) Config for how to override the default system prompt.
-
-    - `SystemMessageBehavior.append`: Appends the provided system message to the
-      default system prompt. - `SystemMessageBehavior.replace`: Replaces the default
-      system prompt with the provided system message. The system message can include
-      the string '{{function_definitions}}' to indicate where the function
-      definitions should be inserted.
-    """
-
-    tool_choice: Union[Literal["auto", "required", "none"], str, None] = None
-    """(Optional) Whether tool use is automatic, required, or none.
-
-    Can also specify a tool name to use a specific tool. Defaults to
-    ToolChoice.auto.
-    """
-
-    tool_prompt_format: Optional[Literal["json", "function_tag", "python_list"]] = None
-    """(Optional) Instructs the model how to format tool calls.
-
-    By default, Llama Stack will attempt to use a format that is best adapted to the
-    model. - `ToolPromptFormat.json`: The tool calls are formatted as a JSON
-    object. - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a
-    <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls
-    are output as Python syntax -- a list of function calls.
-    """
-
-
-class ToolgroupAgentToolGroupWithArgs(BaseModel):
-    args: Dict[str, Union[bool, float, str, List[object], object, None]]
-
-    name: str
-
-
-Toolgroup: TypeAlias = Union[str, ToolgroupAgentToolGroupWithArgs]
-
-
-class AgentConfig(BaseModel):
-    instructions: str
-    """The system instructions for the agent"""
-
-    model: str
-    """The model identifier to use for the agent"""
-
-    client_tools: Optional[List[ToolDef]] = None
-
-    enable_session_persistence: Optional[bool] = None
-    """Optional flag indicating whether session data has to be persisted"""
-
-    input_shields: Optional[List[str]] = None
-
-    max_infer_iters: Optional[int] = None
-
-    name: Optional[str] = None
-    """Optional name for the agent, used in telemetry and identification"""
-
-    output_shields: Optional[List[str]] = None
-
-    response_format: Optional[ResponseFormat] = None
-    """Optional response format configuration"""
-
-    sampling_params: Optional[SamplingParams] = None
-    """Sampling parameters."""
-
-    tool_choice: Optional[Literal["auto", "required", "none"]] = None
-    """Whether tool use is required or automatic.
-
-    This is a hint to the model which may not be followed. It depends on the
-    Instruction Following capabilities of the model.
-    """
-
-    tool_config: Optional[ToolConfig] = None
-    """Configuration for tool use."""
-
-    tool_prompt_format: Optional[Literal["json", "function_tag", "python_list"]] = None
-    """Prompt format for calling custom / zero shot tools."""
-
-    toolgroups: Optional[List[Toolgroup]] = None
diff --git a/src/llama_stack_client/types/shared/batch_completion.py b/src/llama_stack_client/types/shared/batch_completion.py
deleted file mode 100644
index 547884d1..00000000
--- a/src/llama_stack_client/types/shared/batch_completion.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List
-
-from ..._models import BaseModel
-from ..completion_response import CompletionResponse
-
-__all__ = ["BatchCompletion"]
-
-
-class BatchCompletion(BaseModel):
-    batch: List[CompletionResponse]
diff --git a/src/llama_stack_client/types/shared/chat_completion_response.py b/src/llama_stack_client/types/shared/chat_completion_response.py
deleted file mode 100644
index 20dd1ecc..00000000
--- a/src/llama_stack_client/types/shared/chat_completion_response.py
+++ /dev/null
@@ -1,27 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Optional
-
-from ..._models import BaseModel
-from ..token_log_probs import TokenLogProbs
-from .completion_message import CompletionMessage
-
-__all__ = ["ChatCompletionResponse", "Metric"]
-
-
-class Metric(BaseModel):
-    metric: str
-
-    value: float
-
-    unit: Optional[str] = None
-
-
-class ChatCompletionResponse(BaseModel):
-    completion_message: CompletionMessage
-    """The complete response message"""
-
-    logprobs: Optional[List[TokenLogProbs]] = None
-    """Optional log probabilities for generated tokens"""
-
-    metrics: Optional[List[Metric]] = None
diff --git a/src/llama_stack_client/types/shared/completion_message.py b/src/llama_stack_client/types/shared/completion_message.py
deleted file mode 100644
index 61c10a5b..00000000
--- a/src/llama_stack_client/types/shared/completion_message.py
+++ /dev/null
@@ -1,31 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Optional
-from typing_extensions import Literal
-
-from ..._models import BaseModel
-from .tool_call import ToolCall
-from .interleaved_content import InterleavedContent
-
-__all__ = ["CompletionMessage"]
-
-
-class CompletionMessage(BaseModel):
-    content: InterleavedContent
-    """The content of the model's response"""
-
-    role: Literal["assistant"]
-    """Must be "assistant" to identify this as the model's response"""
-
-    stop_reason: Literal["end_of_turn", "end_of_message", "out_of_tokens"]
-    """Reason why the model stopped generating.
-
-    Options are: - `StopReason.end_of_turn`: The model finished generating the
-    entire response. - `StopReason.end_of_message`: The model finished generating
-    but generated a partial response -- usually, a tool call. The user may call the
-    tool and continue the conversation with the tool's response. -
-    `StopReason.out_of_tokens`: The model ran out of token budget.
-    """
-
-    tool_calls: Optional[List[ToolCall]] = None
-    """List of tool calls. Each tool call is a ToolCall object."""
diff --git a/src/llama_stack_client/types/shared/content_delta.py b/src/llama_stack_client/types/shared/content_delta.py
deleted file mode 100644
index ae036ad8..00000000
--- a/src/llama_stack_client/types/shared/content_delta.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Union
-from typing_extensions import Literal, Annotated, TypeAlias
-
-from ..._utils import PropertyInfo
-from ..._models import BaseModel
-from .tool_call_or_string import ToolCallOrString
-
-__all__ = ["ContentDelta", "TextDelta", "ImageDelta", "ToolCallDelta"]
-
-
-class TextDelta(BaseModel):
-    text: str
-
-    type: Literal["text"]
-
-
-class ImageDelta(BaseModel):
-    image: str
-
-    type: Literal["image"]
-
-
-class ToolCallDelta(BaseModel):
-    parse_status: Literal["started", "in_progress", "failed", "succeeded"]
-
-    tool_call: ToolCallOrString
-
-    type: Literal["tool_call"]
-
-
-ContentDelta: TypeAlias = Annotated[Union[TextDelta, ImageDelta, ToolCallDelta], PropertyInfo(discriminator="type")]
diff --git a/src/llama_stack_client/types/shared/document.py b/src/llama_stack_client/types/shared/document.py
deleted file mode 100644
index 67704232..00000000
--- a/src/llama_stack_client/types/shared/document.py
+++ /dev/null
@@ -1,71 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-from typing_extensions import Literal, TypeAlias
-
-from ..._models import BaseModel
-from .interleaved_content_item import InterleavedContentItem
-
-__all__ = [
-    "Document",
-    "Content",
-    "ContentImageContentItem",
-    "ContentImageContentItemImage",
-    "ContentImageContentItemImageURL",
-    "ContentTextContentItem",
-    "ContentURL",
-]
-
-
-class ContentImageContentItemImageURL(BaseModel):
-    uri: str
-
-
-class ContentImageContentItemImage(BaseModel):
-    data: Optional[str] = None
-    """base64 encoded image data as string"""
-
-    url: Optional[ContentImageContentItemImageURL] = None
-    """A URL of the image or data URL in the format of data:image/{type};base64,{data}.
-
-    Note that URL could have length limits.
-    """
-
-
-class ContentImageContentItem(BaseModel):
-    image: ContentImageContentItemImage
-    """Image as a base64 encoded string or an URL"""
-
-    type: Literal["image"]
-    """Discriminator type of the content item. Always "image" """
-
-
-class ContentTextContentItem(BaseModel):
-    text: str
-    """Text content"""
-
-    type: Literal["text"]
-    """Discriminator type of the content item. Always "text" """
-
-
-class ContentURL(BaseModel):
-    uri: str
-
-
-Content: TypeAlias = Union[
-    str, ContentImageContentItem, ContentTextContentItem, List[InterleavedContentItem], ContentURL
-]
-
-
-class Document(BaseModel):
-    content: Content
-    """The content of the document."""
-
-    document_id: str
-    """The unique identifier for the document."""
-
-    metadata: Dict[str, Union[bool, float, str, List[object], object, None]]
-    """Additional metadata for the document."""
-
-    mime_type: Optional[str] = None
-    """The MIME type of the document."""
diff --git a/src/llama_stack_client/types/shared/interleaved_content.py b/src/llama_stack_client/types/shared/interleaved_content.py
deleted file mode 100644
index dc496150..00000000
--- a/src/llama_stack_client/types/shared/interleaved_content.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Union, Optional
-from typing_extensions import Literal, TypeAlias
-
-from ..._models import BaseModel
-from .interleaved_content_item import InterleavedContentItem
-
-__all__ = [
-    "InterleavedContent",
-    "ImageContentItem",
-    "ImageContentItemImage",
-    "ImageContentItemImageURL",
-    "TextContentItem",
-]
-
-
-class ImageContentItemImageURL(BaseModel):
-    uri: str
-
-
-class ImageContentItemImage(BaseModel):
-    data: Optional[str] = None
-    """base64 encoded image data as string"""
-
-    url: Optional[ImageContentItemImageURL] = None
-    """A URL of the image or data URL in the format of data:image/{type};base64,{data}.
-
-    Note that URL could have length limits.
-    """
-
-
-class ImageContentItem(BaseModel):
-    image: ImageContentItemImage
-    """Image as a base64 encoded string or an URL"""
-
-    type: Literal["image"]
-    """Discriminator type of the content item. Always "image" """
-
-
-class TextContentItem(BaseModel):
-    text: str
-    """Text content"""
-
-    type: Literal["text"]
-    """Discriminator type of the content item. Always "text" """
-
-
-InterleavedContent: TypeAlias = Union[str, ImageContentItem, TextContentItem, List[InterleavedContentItem]]
diff --git a/src/llama_stack_client/types/shared/interleaved_content_item.py b/src/llama_stack_client/types/shared/interleaved_content_item.py
deleted file mode 100644
index 8a3238b8..00000000
--- a/src/llama_stack_client/types/shared/interleaved_content_item.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Union, Optional
-from typing_extensions import Literal, Annotated, TypeAlias
-
-from ..._utils import PropertyInfo
-from ..._models import BaseModel
-
-__all__ = [
-    "InterleavedContentItem",
-    "ImageContentItem",
-    "ImageContentItemImage",
-    "ImageContentItemImageURL",
-    "TextContentItem",
-]
-
-
-class ImageContentItemImageURL(BaseModel):
-    uri: str
-
-
-class ImageContentItemImage(BaseModel):
-    data: Optional[str] = None
-    """base64 encoded image data as string"""
-
-    url: Optional[ImageContentItemImageURL] = None
-    """A URL of the image or data URL in the format of data:image/{type};base64,{data}.
-
-    Note that URL could have length limits.
-    """
-
-
-class ImageContentItem(BaseModel):
-    image: ImageContentItemImage
-    """Image as a base64 encoded string or an URL"""
-
-    type: Literal["image"]
-    """Discriminator type of the content item. Always "image" """
-
-
-class TextContentItem(BaseModel):
-    text: str
-    """Text content"""
-
-    type: Literal["text"]
-    """Discriminator type of the content item. Always "text" """
-
-
-InterleavedContentItem: TypeAlias = Annotated[
-    Union[ImageContentItem, TextContentItem], PropertyInfo(discriminator="type")
-]
diff --git a/src/llama_stack_client/types/shared/message.py b/src/llama_stack_client/types/shared/message.py
deleted file mode 100644
index 1da117ee..00000000
--- a/src/llama_stack_client/types/shared/message.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Union
-from typing_extensions import Annotated, TypeAlias
-
-from ..._utils import PropertyInfo
-from .user_message import UserMessage
-from .system_message import SystemMessage
-from .completion_message import CompletionMessage
-from .tool_response_message import ToolResponseMessage
-
-__all__ = ["Message"]
-
-Message: TypeAlias = Annotated[
-    Union[UserMessage, SystemMessage, ToolResponseMessage, CompletionMessage], PropertyInfo(discriminator="role")
-]
diff --git a/src/llama_stack_client/types/shared/param_type.py b/src/llama_stack_client/types/shared/param_type.py
deleted file mode 100644
index 2fed6df2..00000000
--- a/src/llama_stack_client/types/shared/param_type.py
+++ /dev/null
@@ -1,78 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Union
-from typing_extensions import Literal, Annotated, TypeAlias
-
-from ..._utils import PropertyInfo
-from ..._models import BaseModel
-
-__all__ = [
-    "ParamType",
-    "StringType",
-    "NumberType",
-    "BooleanType",
-    "ArrayType",
-    "ObjectType",
-    "JsonType",
-    "UnionType",
-    "ChatCompletionInputType",
-    "CompletionInputType",
-    "AgentTurnInputType",
-]
-
-
-class StringType(BaseModel):
-    type: Literal["string"]
-
-
-class NumberType(BaseModel):
-    type: Literal["number"]
-
-
-class BooleanType(BaseModel):
-    type: Literal["boolean"]
-
-
-class ArrayType(BaseModel):
-    type: Literal["array"]
-
-
-class ObjectType(BaseModel):
-    type: Literal["object"]
-
-
-class JsonType(BaseModel):
-    type: Literal["json"]
-
-
-class UnionType(BaseModel):
-    type: Literal["union"]
-
-
-class ChatCompletionInputType(BaseModel):
-    type: Literal["chat_completion_input"]
-
-
-class CompletionInputType(BaseModel):
-    type: Literal["completion_input"]
-
-
-class AgentTurnInputType(BaseModel):
-    type: Literal["agent_turn_input"]
-
-
-ParamType: TypeAlias = Annotated[
-    Union[
-        StringType,
-        NumberType,
-        BooleanType,
-        ArrayType,
-        ObjectType,
-        JsonType,
-        UnionType,
-        ChatCompletionInputType,
-        CompletionInputType,
-        AgentTurnInputType,
-    ],
-    PropertyInfo(discriminator="type"),
-]
diff --git a/src/llama_stack_client/types/shared/query_config.py b/src/llama_stack_client/types/shared/query_config.py
deleted file mode 100644
index 3628efbf..00000000
--- a/src/llama_stack_client/types/shared/query_config.py
+++ /dev/null
@@ -1,65 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Union, Optional
-from typing_extensions import Literal, Annotated, TypeAlias
-
-from ..._utils import PropertyInfo
-from ..._models import BaseModel
-from .query_generator_config import QueryGeneratorConfig
-
-__all__ = ["QueryConfig", "Ranker", "RankerRrfRanker", "RankerWeightedRanker"]
-
-
-class RankerRrfRanker(BaseModel):
-    impact_factor: float
-    """The impact factor for RRF scoring.
-
-    Higher values give more weight to higher-ranked results. Must be greater than 0.
-    Default of 60 is from the original RRF paper (Cormack et al., 2009).
-    """
-
-    type: Literal["rrf"]
-    """The type of ranker, always "rrf" """
-
-
-class RankerWeightedRanker(BaseModel):
-    alpha: float
-    """Weight factor between 0 and 1.
-
-    0 means only use keyword scores, 1 means only use vector scores, values in
-    between blend both scores.
-    """
-
-    type: Literal["weighted"]
-    """The type of ranker, always "weighted" """
-
-
-Ranker: TypeAlias = Annotated[Union[RankerRrfRanker, RankerWeightedRanker], PropertyInfo(discriminator="type")]
-
-
-class QueryConfig(BaseModel):
-    chunk_template: str
-    """Template for formatting each retrieved chunk in the context.
-
-    Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk
-    content string), {metadata} (chunk metadata dict). Default: "Result
-    {index}\nContent: {chunk.content}\nMetadata: {metadata}\n"
-    """
-
-    max_chunks: int
-    """Maximum number of chunks to retrieve."""
-
-    max_tokens_in_context: int
-    """Maximum number of tokens in the context."""
-
-    query_generator_config: QueryGeneratorConfig
-    """Configuration for the query generator."""
-
-    mode: Optional[str] = None
-    """Search mode for retrieval—either "vector", "keyword", or "hybrid".
-
-    Default "vector".
-    """
-
-    ranker: Optional[Ranker] = None
-    """Configuration for the ranker to use in hybrid search. Defaults to RRF ranker."""
diff --git a/src/llama_stack_client/types/shared/query_generator_config.py b/src/llama_stack_client/types/shared/query_generator_config.py
deleted file mode 100644
index 559fca7d..00000000
--- a/src/llama_stack_client/types/shared/query_generator_config.py
+++ /dev/null
@@ -1,28 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Union
-from typing_extensions import Literal, Annotated, TypeAlias
-
-from ..._utils import PropertyInfo
-from ..._models import BaseModel
-
-__all__ = ["QueryGeneratorConfig", "DefaultRagQueryGeneratorConfig", "LlmragQueryGeneratorConfig"]
-
-
-class DefaultRagQueryGeneratorConfig(BaseModel):
-    separator: str
-
-    type: Literal["default"]
-
-
-class LlmragQueryGeneratorConfig(BaseModel):
-    model: str
-
-    template: str
-
-    type: Literal["llm"]
-
-
-QueryGeneratorConfig: TypeAlias = Annotated[
-    Union[DefaultRagQueryGeneratorConfig, LlmragQueryGeneratorConfig], PropertyInfo(discriminator="type")
-]
diff --git a/src/llama_stack_client/types/shared/query_result.py b/src/llama_stack_client/types/shared/query_result.py
deleted file mode 100644
index c0a1d44c..00000000
--- a/src/llama_stack_client/types/shared/query_result.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-
-from ..._models import BaseModel
-from .interleaved_content import InterleavedContent
-
-__all__ = ["QueryResult"]
-
-
-class QueryResult(BaseModel):
-    metadata: Dict[str, Union[bool, float, str, List[object], object, None]]
-
-    content: Optional[InterleavedContent] = None
-    """A image content item"""
diff --git a/src/llama_stack_client/types/shared/response_format.py b/src/llama_stack_client/types/shared/response_format.py
deleted file mode 100644
index 537df8d5..00000000
--- a/src/llama_stack_client/types/shared/response_format.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union
-from typing_extensions import Literal, Annotated, TypeAlias
-
-from ..._utils import PropertyInfo
-from ..._models import BaseModel
-
-__all__ = ["ResponseFormat", "JsonSchemaResponseFormat", "GrammarResponseFormat"]
-
-
-class JsonSchemaResponseFormat(BaseModel):
-    json_schema: Dict[str, Union[bool, float, str, List[object], object, None]]
-    """The JSON schema the response should conform to.
-
-    In a Python SDK, this is often a `pydantic` model.
-    """
-
-    type: Literal["json_schema"]
-    """Must be "json_schema" to identify this format type"""
-
-
-class GrammarResponseFormat(BaseModel):
-    bnf: Dict[str, Union[bool, float, str, List[object], object, None]]
-    """The BNF grammar specification the response should conform to"""
-
-    type: Literal["grammar"]
-    """Must be "grammar" to identify this format type"""
-
-
-ResponseFormat: TypeAlias = Annotated[
-    Union[JsonSchemaResponseFormat, GrammarResponseFormat], PropertyInfo(discriminator="type")
-]
diff --git a/src/llama_stack_client/types/shared/return_type.py b/src/llama_stack_client/types/shared/return_type.py
deleted file mode 100644
index 542f9e5c..00000000
--- a/src/llama_stack_client/types/shared/return_type.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing_extensions import Literal
-
-from ..._models import BaseModel
-
-__all__ = ["ReturnType"]
-
-
-class ReturnType(BaseModel):
-    type: Literal[
-        "string",
-        "number",
-        "boolean",
-        "array",
-        "object",
-        "json",
-        "union",
-        "chat_completion_input",
-        "completion_input",
-        "agent_turn_input",
-    ]
diff --git a/src/llama_stack_client/types/shared/safety_violation.py b/src/llama_stack_client/types/shared/safety_violation.py
deleted file mode 100644
index e3c94312..00000000
--- a/src/llama_stack_client/types/shared/safety_violation.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-from typing_extensions import Literal
-
-from ..._models import BaseModel
-
-__all__ = ["SafetyViolation"]
-
-
-class SafetyViolation(BaseModel):
-    metadata: Dict[str, Union[bool, float, str, List[object], object, None]]
-
-    violation_level: Literal["info", "warn", "error"]
-
-    user_message: Optional[str] = None
diff --git a/src/llama_stack_client/types/shared/sampling_params.py b/src/llama_stack_client/types/shared/sampling_params.py
deleted file mode 100644
index 7ce2211e..00000000
--- a/src/llama_stack_client/types/shared/sampling_params.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Union, Optional
-from typing_extensions import Literal, Annotated, TypeAlias
-
-from ..._utils import PropertyInfo
-from ..._models import BaseModel
-
-__all__ = [
-    "SamplingParams",
-    "Strategy",
-    "StrategyGreedySamplingStrategy",
-    "StrategyTopPSamplingStrategy",
-    "StrategyTopKSamplingStrategy",
-]
-
-
-class StrategyGreedySamplingStrategy(BaseModel):
-    type: Literal["greedy"]
-
-
-class StrategyTopPSamplingStrategy(BaseModel):
-    type: Literal["top_p"]
-
-    temperature: Optional[float] = None
-
-    top_p: Optional[float] = None
-
-
-class StrategyTopKSamplingStrategy(BaseModel):
-    top_k: int
-
-    type: Literal["top_k"]
-
-
-Strategy: TypeAlias = Annotated[
-    Union[StrategyGreedySamplingStrategy, StrategyTopPSamplingStrategy, StrategyTopKSamplingStrategy],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class SamplingParams(BaseModel):
-    strategy: Strategy
-    """The sampling strategy."""
-
-    max_tokens: Optional[int] = None
-    """The maximum number of tokens that can be generated in the completion.
-
-    The token count of your prompt plus max_tokens cannot exceed the model's context
-    length.
-    """
-
-    repetition_penalty: Optional[float] = None
-    """Number between -2.0 and 2.0.
-
-    Positive values penalize new tokens based on whether they appear in the text so
-    far, increasing the model's likelihood to talk about new topics.
-    """
-
-    stop: Optional[List[str]] = None
-    """Up to 4 sequences where the API will stop generating further tokens.
-
-    The returned text will not contain the stop sequence.
-    """
diff --git a/src/llama_stack_client/types/shared/scoring_result.py b/src/llama_stack_client/types/shared/scoring_result.py
deleted file mode 100644
index 61ad9b54..00000000
--- a/src/llama_stack_client/types/shared/scoring_result.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union
-
-from ..._models import BaseModel
-
-__all__ = ["ScoringResult"]
-
-
-class ScoringResult(BaseModel):
-    aggregated_results: Dict[str, Union[bool, float, str, List[object], object, None]]
-    """Map of metric name to aggregated value"""
-
-    score_rows: List[Dict[str, Union[bool, float, str, List[object], object, None]]]
-    """The scoring result for each row. Each row is a map of column name to value."""
diff --git a/src/llama_stack_client/types/shared/system_message.py b/src/llama_stack_client/types/shared/system_message.py
deleted file mode 100644
index a854e019..00000000
--- a/src/llama_stack_client/types/shared/system_message.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing_extensions import Literal
-
-from ..._models import BaseModel
-from .interleaved_content import InterleavedContent
-
-__all__ = ["SystemMessage"]
-
-
-class SystemMessage(BaseModel):
-    content: InterleavedContent
-    """The content of the "system prompt".
-
-    If multiple system messages are provided, they are concatenated. The underlying
-    Llama Stack code may also add other system messages (for example, for formatting
-    tool definitions).
-    """
-
-    role: Literal["system"]
-    """Must be "system" to identify this as a system message"""
diff --git a/src/llama_stack_client/types/shared/tool_call.py b/src/llama_stack_client/types/shared/tool_call.py
deleted file mode 100644
index b9301d75..00000000
--- a/src/llama_stack_client/types/shared/tool_call.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-from typing_extensions import Literal
-
-from ..._models import BaseModel
-
-__all__ = ["ToolCall"]
-
-
-class ToolCall(BaseModel):
-    arguments: Union[
-        str,
-        Dict[
-            str,
-            Union[
-                str, float, bool, List[Union[str, float, bool, None]], Dict[str, Union[str, float, bool, None]], None
-            ],
-        ],
-    ]
-
-    call_id: str
-
-    tool_name: Union[Literal["brave_search", "wolfram_alpha", "photogen", "code_interpreter"], str]
-
-    arguments_json: Optional[str] = None
diff --git a/src/llama_stack_client/types/shared/tool_call_or_string.py b/src/llama_stack_client/types/shared/tool_call_or_string.py
deleted file mode 100644
index f52a0d98..00000000
--- a/src/llama_stack_client/types/shared/tool_call_or_string.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Union
-from typing_extensions import TypeAlias
-
-from .tool_call import ToolCall
-
-__all__ = ["ToolCallOrString"]
-
-ToolCallOrString: TypeAlias = Union[str, ToolCall]
diff --git a/src/llama_stack_client/types/shared/tool_param_definition.py b/src/llama_stack_client/types/shared/tool_param_definition.py
deleted file mode 100644
index 1466c1f9..00000000
--- a/src/llama_stack_client/types/shared/tool_param_definition.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Union, Optional
-
-from ..._models import BaseModel
-
-__all__ = ["ToolParamDefinition"]
-
-
-class ToolParamDefinition(BaseModel):
-    param_type: str
-
-    default: Union[bool, float, str, List[object], object, None] = None
-
-    description: Optional[str] = None
-
-    required: Optional[bool] = None
diff --git a/src/llama_stack_client/types/shared/tool_response_message.py b/src/llama_stack_client/types/shared/tool_response_message.py
deleted file mode 100644
index c7b8f21f..00000000
--- a/src/llama_stack_client/types/shared/tool_response_message.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing_extensions import Literal
-
-from ..._models import BaseModel
-from .interleaved_content import InterleavedContent
-
-__all__ = ["ToolResponseMessage"]
-
-
-class ToolResponseMessage(BaseModel):
-    call_id: str
-    """Unique identifier for the tool call this response is for"""
-
-    content: InterleavedContent
-    """The response content from the tool"""
-
-    role: Literal["tool"]
-    """Must be "tool" to identify this as a tool response"""
diff --git a/src/llama_stack_client/types/shared/user_message.py b/src/llama_stack_client/types/shared/user_message.py
deleted file mode 100644
index 2a89fbba..00000000
--- a/src/llama_stack_client/types/shared/user_message.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Optional
-from typing_extensions import Literal
-
-from ..._models import BaseModel
-from .interleaved_content import InterleavedContent
-
-__all__ = ["UserMessage"]
-
-
-class UserMessage(BaseModel):
-    content: InterleavedContent
-    """The content of the message, which can include text and other media"""
-
-    role: Literal["user"]
-    """Must be "user" to identify this as a user message"""
-
-    context: Optional[InterleavedContent] = None
-    """(Optional) This field is used internally by Llama Stack to pass RAG context.
-
-    This field may be removed in the API in the future.
-    """
diff --git a/src/llama_stack_client/types/shared_params/__init__.py b/src/llama_stack_client/types/shared_params/__init__.py
deleted file mode 100644
index bd623812..00000000
--- a/src/llama_stack_client/types/shared_params/__init__.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .message import Message as Message
-from .document import Document as Document
-from .tool_call import ToolCall as ToolCall
-from .return_type import ReturnType as ReturnType
-from .agent_config import AgentConfig as AgentConfig
-from .query_config import QueryConfig as QueryConfig
-from .user_message import UserMessage as UserMessage
-from .system_message import SystemMessage as SystemMessage
-from .response_format import ResponseFormat as ResponseFormat
-from .sampling_params import SamplingParams as SamplingParams
-from .completion_message import CompletionMessage as CompletionMessage
-from .interleaved_content import InterleavedContent as InterleavedContent
-from .tool_param_definition import ToolParamDefinition as ToolParamDefinition
-from .tool_response_message import ToolResponseMessage as ToolResponseMessage
-from .query_generator_config import QueryGeneratorConfig as QueryGeneratorConfig
-from .interleaved_content_item import InterleavedContentItem as InterleavedContentItem
diff --git a/src/llama_stack_client/types/shared_params/agent_config.py b/src/llama_stack_client/types/shared_params/agent_config.py
deleted file mode 100644
index 5cebec3f..00000000
--- a/src/llama_stack_client/types/shared_params/agent_config.py
+++ /dev/null
@@ -1,93 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, List, Union, Iterable
-from typing_extensions import Literal, Required, TypeAlias, TypedDict
-
-from ..tool_def_param import ToolDefParam
-from .response_format import ResponseFormat
-from .sampling_params import SamplingParams
-
-__all__ = ["AgentConfig", "ToolConfig", "Toolgroup", "ToolgroupAgentToolGroupWithArgs"]
-
-
-class ToolConfig(TypedDict, total=False):
-    system_message_behavior: Literal["append", "replace"]
-    """(Optional) Config for how to override the default system prompt.
-
-    - `SystemMessageBehavior.append`: Appends the provided system message to the
-      default system prompt. - `SystemMessageBehavior.replace`: Replaces the default
-      system prompt with the provided system message. The system message can include
-      the string '{{function_definitions}}' to indicate where the function
-      definitions should be inserted.
-    """
-
-    tool_choice: Union[Literal["auto", "required", "none"], str]
-    """(Optional) Whether tool use is automatic, required, or none.
-
-    Can also specify a tool name to use a specific tool. Defaults to
-    ToolChoice.auto.
-    """
-
-    tool_prompt_format: Literal["json", "function_tag", "python_list"]
-    """(Optional) Instructs the model how to format tool calls.
-
-    By default, Llama Stack will attempt to use a format that is best adapted to the
-    model. - `ToolPromptFormat.json`: The tool calls are formatted as a JSON
-    object. - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a
-    <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls
-    are output as Python syntax -- a list of function calls.
-    """
-
-
-class ToolgroupAgentToolGroupWithArgs(TypedDict, total=False):
-    args: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-
-    name: Required[str]
-
-
-Toolgroup: TypeAlias = Union[str, ToolgroupAgentToolGroupWithArgs]
-
-
-class AgentConfig(TypedDict, total=False):
-    instructions: Required[str]
-    """The system instructions for the agent"""
-
-    model: Required[str]
-    """The model identifier to use for the agent"""
-
-    client_tools: Iterable[ToolDefParam]
-
-    enable_session_persistence: bool
-    """Optional flag indicating whether session data has to be persisted"""
-
-    input_shields: List[str]
-
-    max_infer_iters: int
-
-    name: str
-    """Optional name for the agent, used in telemetry and identification"""
-
-    output_shields: List[str]
-
-    response_format: ResponseFormat
-    """Optional response format configuration"""
-
-    sampling_params: SamplingParams
-    """Sampling parameters."""
-
-    tool_choice: Literal["auto", "required", "none"]
-    """Whether tool use is required or automatic.
-
-    This is a hint to the model which may not be followed. It depends on the
-    Instruction Following capabilities of the model.
-    """
-
-    tool_config: ToolConfig
-    """Configuration for tool use."""
-
-    tool_prompt_format: Literal["json", "function_tag", "python_list"]
-    """Prompt format for calling custom / zero shot tools."""
-
-    toolgroups: List[Toolgroup]
diff --git a/src/llama_stack_client/types/shared_params/completion_message.py b/src/llama_stack_client/types/shared_params/completion_message.py
deleted file mode 100644
index 43b2529e..00000000
--- a/src/llama_stack_client/types/shared_params/completion_message.py
+++ /dev/null
@@ -1,32 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Iterable
-from typing_extensions import Literal, Required, TypedDict
-
-from .tool_call import ToolCall
-from .interleaved_content import InterleavedContent
-
-__all__ = ["CompletionMessage"]
-
-
-class CompletionMessage(TypedDict, total=False):
-    content: Required[InterleavedContent]
-    """The content of the model's response"""
-
-    role: Required[Literal["assistant"]]
-    """Must be "assistant" to identify this as the model's response"""
-
-    stop_reason: Required[Literal["end_of_turn", "end_of_message", "out_of_tokens"]]
-    """Reason why the model stopped generating.
-
-    Options are: - `StopReason.end_of_turn`: The model finished generating the
-    entire response. - `StopReason.end_of_message`: The model finished generating
-    but generated a partial response -- usually, a tool call. The user may call the
-    tool and continue the conversation with the tool's response. -
-    `StopReason.out_of_tokens`: The model ran out of token budget.
-    """
-
-    tool_calls: Iterable[ToolCall]
-    """List of tool calls. Each tool call is a ToolCall object."""
diff --git a/src/llama_stack_client/types/shared_params/document.py b/src/llama_stack_client/types/shared_params/document.py
deleted file mode 100644
index 78564cfa..00000000
--- a/src/llama_stack_client/types/shared_params/document.py
+++ /dev/null
@@ -1,72 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union, Iterable
-from typing_extensions import Literal, Required, TypeAlias, TypedDict
-
-from .interleaved_content_item import InterleavedContentItem
-
-__all__ = [
-    "Document",
-    "Content",
-    "ContentImageContentItem",
-    "ContentImageContentItemImage",
-    "ContentImageContentItemImageURL",
-    "ContentTextContentItem",
-    "ContentURL",
-]
-
-
-class ContentImageContentItemImageURL(TypedDict, total=False):
-    uri: Required[str]
-
-
-class ContentImageContentItemImage(TypedDict, total=False):
-    data: str
-    """base64 encoded image data as string"""
-
-    url: ContentImageContentItemImageURL
-    """A URL of the image or data URL in the format of data:image/{type};base64,{data}.
-
-    Note that URL could have length limits.
-    """
-
-
-class ContentImageContentItem(TypedDict, total=False):
-    image: Required[ContentImageContentItemImage]
-    """Image as a base64 encoded string or an URL"""
-
-    type: Required[Literal["image"]]
-    """Discriminator type of the content item. Always "image" """
-
-
-class ContentTextContentItem(TypedDict, total=False):
-    text: Required[str]
-    """Text content"""
-
-    type: Required[Literal["text"]]
-    """Discriminator type of the content item. Always "text" """
-
-
-class ContentURL(TypedDict, total=False):
-    uri: Required[str]
-
-
-Content: TypeAlias = Union[
-    str, ContentImageContentItem, ContentTextContentItem, Iterable[InterleavedContentItem], ContentURL
-]
-
-
-class Document(TypedDict, total=False):
-    content: Required[Content]
-    """The content of the document."""
-
-    document_id: Required[str]
-    """The unique identifier for the document."""
-
-    metadata: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-    """Additional metadata for the document."""
-
-    mime_type: str
-    """The MIME type of the document."""
diff --git a/src/llama_stack_client/types/shared_params/interleaved_content.py b/src/llama_stack_client/types/shared_params/interleaved_content.py
deleted file mode 100644
index 5d045a20..00000000
--- a/src/llama_stack_client/types/shared_params/interleaved_content.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Union, Iterable
-from typing_extensions import Literal, Required, TypeAlias, TypedDict
-
-from .interleaved_content_item import InterleavedContentItem
-
-__all__ = [
-    "InterleavedContent",
-    "ImageContentItem",
-    "ImageContentItemImage",
-    "ImageContentItemImageURL",
-    "TextContentItem",
-]
-
-
-class ImageContentItemImageURL(TypedDict, total=False):
-    uri: Required[str]
-
-
-class ImageContentItemImage(TypedDict, total=False):
-    data: str
-    """base64 encoded image data as string"""
-
-    url: ImageContentItemImageURL
-    """A URL of the image or data URL in the format of data:image/{type};base64,{data}.
-
-    Note that URL could have length limits.
-    """
-
-
-class ImageContentItem(TypedDict, total=False):
-    image: Required[ImageContentItemImage]
-    """Image as a base64 encoded string or an URL"""
-
-    type: Required[Literal["image"]]
-    """Discriminator type of the content item. Always "image" """
-
-
-class TextContentItem(TypedDict, total=False):
-    text: Required[str]
-    """Text content"""
-
-    type: Required[Literal["text"]]
-    """Discriminator type of the content item. Always "text" """
-
-
-InterleavedContent: TypeAlias = Union[str, ImageContentItem, TextContentItem, Iterable[InterleavedContentItem]]
diff --git a/src/llama_stack_client/types/shared_params/interleaved_content_item.py b/src/llama_stack_client/types/shared_params/interleaved_content_item.py
deleted file mode 100644
index b5c0bcc1..00000000
--- a/src/llama_stack_client/types/shared_params/interleaved_content_item.py
+++ /dev/null
@@ -1,48 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Union
-from typing_extensions import Literal, Required, TypeAlias, TypedDict
-
-__all__ = [
-    "InterleavedContentItem",
-    "ImageContentItem",
-    "ImageContentItemImage",
-    "ImageContentItemImageURL",
-    "TextContentItem",
-]
-
-
-class ImageContentItemImageURL(TypedDict, total=False):
-    uri: Required[str]
-
-
-class ImageContentItemImage(TypedDict, total=False):
-    data: str
-    """base64 encoded image data as string"""
-
-    url: ImageContentItemImageURL
-    """A URL of the image or data URL in the format of data:image/{type};base64,{data}.
-
-    Note that URL could have length limits.
-    """
-
-
-class ImageContentItem(TypedDict, total=False):
-    image: Required[ImageContentItemImage]
-    """Image as a base64 encoded string or an URL"""
-
-    type: Required[Literal["image"]]
-    """Discriminator type of the content item. Always "image" """
-
-
-class TextContentItem(TypedDict, total=False):
-    text: Required[str]
-    """Text content"""
-
-    type: Required[Literal["text"]]
-    """Discriminator type of the content item. Always "text" """
-
-
-InterleavedContentItem: TypeAlias = Union[ImageContentItem, TextContentItem]
diff --git a/src/llama_stack_client/types/shared_params/message.py b/src/llama_stack_client/types/shared_params/message.py
deleted file mode 100644
index 24a49afc..00000000
--- a/src/llama_stack_client/types/shared_params/message.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Union
-from typing_extensions import TypeAlias
-
-from .user_message import UserMessage
-from .system_message import SystemMessage
-from .completion_message import CompletionMessage
-from .tool_response_message import ToolResponseMessage
-
-__all__ = ["Message"]
-
-Message: TypeAlias = Union[UserMessage, SystemMessage, ToolResponseMessage, CompletionMessage]
diff --git a/src/llama_stack_client/types/shared_params/query_config.py b/src/llama_stack_client/types/shared_params/query_config.py
deleted file mode 100644
index dd482ecf..00000000
--- a/src/llama_stack_client/types/shared_params/query_config.py
+++ /dev/null
@@ -1,65 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Union
-from typing_extensions import Literal, Required, TypeAlias, TypedDict
-
-from .query_generator_config import QueryGeneratorConfig
-
-__all__ = ["QueryConfig", "Ranker", "RankerRrfRanker", "RankerWeightedRanker"]
-
-
-class RankerRrfRanker(TypedDict, total=False):
-    impact_factor: Required[float]
-    """The impact factor for RRF scoring.
-
-    Higher values give more weight to higher-ranked results. Must be greater than 0.
-    Default of 60 is from the original RRF paper (Cormack et al., 2009).
-    """
-
-    type: Required[Literal["rrf"]]
-    """The type of ranker, always "rrf" """
-
-
-class RankerWeightedRanker(TypedDict, total=False):
-    alpha: Required[float]
-    """Weight factor between 0 and 1.
-
-    0 means only use keyword scores, 1 means only use vector scores, values in
-    between blend both scores.
-    """
-
-    type: Required[Literal["weighted"]]
-    """The type of ranker, always "weighted" """
-
-
-Ranker: TypeAlias = Union[RankerRrfRanker, RankerWeightedRanker]
-
-
-class QueryConfig(TypedDict, total=False):
-    chunk_template: Required[str]
-    """Template for formatting each retrieved chunk in the context.
-
-    Available placeholders: {index} (1-based chunk ordinal), {chunk.content} (chunk
-    content string), {metadata} (chunk metadata dict). Default: "Result
-    {index}\nContent: {chunk.content}\nMetadata: {metadata}\n"
-    """
-
-    max_chunks: Required[int]
-    """Maximum number of chunks to retrieve."""
-
-    max_tokens_in_context: Required[int]
-    """Maximum number of tokens in the context."""
-
-    query_generator_config: Required[QueryGeneratorConfig]
-    """Configuration for the query generator."""
-
-    mode: str
-    """Search mode for retrieval—either "vector", "keyword", or "hybrid".
-
-    Default "vector".
-    """
-
-    ranker: Ranker
-    """Configuration for the ranker to use in hybrid search. Defaults to RRF ranker."""
diff --git a/src/llama_stack_client/types/shared_params/query_generator_config.py b/src/llama_stack_client/types/shared_params/query_generator_config.py
deleted file mode 100644
index db135e80..00000000
--- a/src/llama_stack_client/types/shared_params/query_generator_config.py
+++ /dev/null
@@ -1,25 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Union
-from typing_extensions import Literal, Required, TypeAlias, TypedDict
-
-__all__ = ["QueryGeneratorConfig", "DefaultRagQueryGeneratorConfig", "LlmragQueryGeneratorConfig"]
-
-
-class DefaultRagQueryGeneratorConfig(TypedDict, total=False):
-    separator: Required[str]
-
-    type: Required[Literal["default"]]
-
-
-class LlmragQueryGeneratorConfig(TypedDict, total=False):
-    model: Required[str]
-
-    template: Required[str]
-
-    type: Required[Literal["llm"]]
-
-
-QueryGeneratorConfig: TypeAlias = Union[DefaultRagQueryGeneratorConfig, LlmragQueryGeneratorConfig]
diff --git a/src/llama_stack_client/types/shared_params/response_format.py b/src/llama_stack_client/types/shared_params/response_format.py
deleted file mode 100644
index 53411700..00000000
--- a/src/llama_stack_client/types/shared_params/response_format.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union, Iterable
-from typing_extensions import Literal, Required, TypeAlias, TypedDict
-
-__all__ = ["ResponseFormat", "JsonSchemaResponseFormat", "GrammarResponseFormat"]
-
-
-class JsonSchemaResponseFormat(TypedDict, total=False):
-    json_schema: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-    """The JSON schema the response should conform to.
-
-    In a Python SDK, this is often a `pydantic` model.
-    """
-
-    type: Required[Literal["json_schema"]]
-    """Must be "json_schema" to identify this format type"""
-
-
-class GrammarResponseFormat(TypedDict, total=False):
-    bnf: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-    """The BNF grammar specification the response should conform to"""
-
-    type: Required[Literal["grammar"]]
-    """Must be "grammar" to identify this format type"""
-
-
-ResponseFormat: TypeAlias = Union[JsonSchemaResponseFormat, GrammarResponseFormat]
diff --git a/src/llama_stack_client/types/shared_params/return_type.py b/src/llama_stack_client/types/shared_params/return_type.py
deleted file mode 100644
index dc039137..00000000
--- a/src/llama_stack_client/types/shared_params/return_type.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Literal, Required, TypedDict
-
-__all__ = ["ReturnType"]
-
-
-class ReturnType(TypedDict, total=False):
-    type: Required[
-        Literal[
-            "string",
-            "number",
-            "boolean",
-            "array",
-            "object",
-            "json",
-            "union",
-            "chat_completion_input",
-            "completion_input",
-            "agent_turn_input",
-        ]
-    ]
diff --git a/src/llama_stack_client/types/shared_params/sampling_params.py b/src/llama_stack_client/types/shared_params/sampling_params.py
deleted file mode 100644
index 158db1c5..00000000
--- a/src/llama_stack_client/types/shared_params/sampling_params.py
+++ /dev/null
@@ -1,60 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import List, Union
-from typing_extensions import Literal, Required, TypeAlias, TypedDict
-
-__all__ = [
-    "SamplingParams",
-    "Strategy",
-    "StrategyGreedySamplingStrategy",
-    "StrategyTopPSamplingStrategy",
-    "StrategyTopKSamplingStrategy",
-]
-
-
-class StrategyGreedySamplingStrategy(TypedDict, total=False):
-    type: Required[Literal["greedy"]]
-
-
-class StrategyTopPSamplingStrategy(TypedDict, total=False):
-    type: Required[Literal["top_p"]]
-
-    temperature: float
-
-    top_p: float
-
-
-class StrategyTopKSamplingStrategy(TypedDict, total=False):
-    top_k: Required[int]
-
-    type: Required[Literal["top_k"]]
-
-
-Strategy: TypeAlias = Union[StrategyGreedySamplingStrategy, StrategyTopPSamplingStrategy, StrategyTopKSamplingStrategy]
-
-
-class SamplingParams(TypedDict, total=False):
-    strategy: Required[Strategy]
-    """The sampling strategy."""
-
-    max_tokens: int
-    """The maximum number of tokens that can be generated in the completion.
-
-    The token count of your prompt plus max_tokens cannot exceed the model's context
-    length.
-    """
-
-    repetition_penalty: float
-    """Number between -2.0 and 2.0.
-
-    Positive values penalize new tokens based on whether they appear in the text so
-    far, increasing the model's likelihood to talk about new topics.
-    """
-
-    stop: List[str]
-    """Up to 4 sequences where the API will stop generating further tokens.
-
-    The returned text will not contain the stop sequence.
-    """
diff --git a/src/llama_stack_client/types/shared_params/system_message.py b/src/llama_stack_client/types/shared_params/system_message.py
deleted file mode 100644
index 7cf9535c..00000000
--- a/src/llama_stack_client/types/shared_params/system_message.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Literal, Required, TypedDict
-
-from .interleaved_content import InterleavedContent
-
-__all__ = ["SystemMessage"]
-
-
-class SystemMessage(TypedDict, total=False):
-    content: Required[InterleavedContent]
-    """The content of the "system prompt".
-
-    If multiple system messages are provided, they are concatenated. The underlying
-    Llama Stack code may also add other system messages (for example, for formatting
-    tool definitions).
-    """
-
-    role: Required[Literal["system"]]
-    """Must be "system" to identify this as a system message"""
diff --git a/src/llama_stack_client/types/shared_params/tool_call.py b/src/llama_stack_client/types/shared_params/tool_call.py
deleted file mode 100644
index 801716e9..00000000
--- a/src/llama_stack_client/types/shared_params/tool_call.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, List, Union
-from typing_extensions import Literal, Required, TypedDict
-
-__all__ = ["ToolCall"]
-
-
-class ToolCall(TypedDict, total=False):
-    arguments: Required[
-        Union[
-            str,
-            Dict[
-                str,
-                Union[
-                    str,
-                    float,
-                    bool,
-                    List[Union[str, float, bool, None]],
-                    Dict[str, Union[str, float, bool, None]],
-                    None,
-                ],
-            ],
-        ]
-    ]
-
-    call_id: Required[str]
-
-    tool_name: Required[Union[Literal["brave_search", "wolfram_alpha", "photogen", "code_interpreter"], str]]
-
-    arguments_json: str
diff --git a/src/llama_stack_client/types/shared_params/tool_param_definition.py b/src/llama_stack_client/types/shared_params/tool_param_definition.py
deleted file mode 100644
index 2d7805fe..00000000
--- a/src/llama_stack_client/types/shared_params/tool_param_definition.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Union, Iterable
-from typing_extensions import Required, TypedDict
-
-__all__ = ["ToolParamDefinition"]
-
-
-class ToolParamDefinition(TypedDict, total=False):
-    param_type: Required[str]
-
-    default: Union[bool, float, str, Iterable[object], object, None]
-
-    description: str
-
-    required: bool
diff --git a/src/llama_stack_client/types/shared_params/tool_response_message.py b/src/llama_stack_client/types/shared_params/tool_response_message.py
deleted file mode 100644
index 789bbcde..00000000
--- a/src/llama_stack_client/types/shared_params/tool_response_message.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Literal, Required, TypedDict
-
-from .interleaved_content import InterleavedContent
-
-__all__ = ["ToolResponseMessage"]
-
-
-class ToolResponseMessage(TypedDict, total=False):
-    call_id: Required[str]
-    """Unique identifier for the tool call this response is for"""
-
-    content: Required[InterleavedContent]
-    """The response content from the tool"""
-
-    role: Required[Literal["tool"]]
-    """Must be "tool" to identify this as a tool response"""
diff --git a/src/llama_stack_client/types/shared_params/user_message.py b/src/llama_stack_client/types/shared_params/user_message.py
deleted file mode 100644
index 4b8e3de3..00000000
--- a/src/llama_stack_client/types/shared_params/user_message.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Literal, Required, TypedDict
-
-from .interleaved_content import InterleavedContent
-
-__all__ = ["UserMessage"]
-
-
-class UserMessage(TypedDict, total=False):
-    content: Required[InterleavedContent]
-    """The content of the message, which can include text and other media"""
-
-    role: Required[Literal["user"]]
-    """Must be "user" to identify this as a user message"""
-
-    context: InterleavedContent
-    """(Optional) This field is used internally by Llama Stack to pass RAG context.
-
-    This field may be removed in the API in the future.
-    """
diff --git a/src/llama_stack_client/types/shield.py b/src/llama_stack_client/types/shield.py
deleted file mode 100644
index ff5f01f1..00000000
--- a/src/llama_stack_client/types/shield.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-from typing_extensions import Literal
-
-from .._models import BaseModel
-
-__all__ = ["Shield"]
-
-
-class Shield(BaseModel):
-    identifier: str
-
-    provider_id: str
-
-    type: Literal["shield"]
-
-    params: Optional[Dict[str, Union[bool, float, str, List[object], object, None]]] = None
-
-    provider_resource_id: Optional[str] = None
diff --git a/src/llama_stack_client/types/shield_call_step.py b/src/llama_stack_client/types/shield_call_step.py
deleted file mode 100644
index e19734c6..00000000
--- a/src/llama_stack_client/types/shield_call_step.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Optional
-from datetime import datetime
-from typing_extensions import Literal
-
-from .._models import BaseModel
-from .shared.safety_violation import SafetyViolation
-
-__all__ = ["ShieldCallStep"]
-
-
-class ShieldCallStep(BaseModel):
-    step_id: str
-    """The ID of the step."""
-
-    step_type: Literal["shield_call"]
-    """Type of the step in an agent turn."""
-
-    turn_id: str
-    """The ID of the turn."""
-
-    completed_at: Optional[datetime] = None
-    """The time the step completed."""
-
-    started_at: Optional[datetime] = None
-    """The time the step started."""
-
-    violation: Optional[SafetyViolation] = None
-    """The violation from the shield call."""
diff --git a/src/llama_stack_client/types/shield_register_params.py b/src/llama_stack_client/types/shield_register_params.py
deleted file mode 100644
index 7ae0b2c1..00000000
--- a/src/llama_stack_client/types/shield_register_params.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union, Iterable
-from typing_extensions import Required, TypedDict
-
-__all__ = ["ShieldRegisterParams"]
-
-
-class ShieldRegisterParams(TypedDict, total=False):
-    shield_id: Required[str]
-    """The identifier of the shield to register."""
-
-    params: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
-    """The parameters of the shield."""
-
-    provider_id: str
-    """The identifier of the provider."""
-
-    provider_shield_id: str
-    """The identifier of the shield in the provider."""
diff --git a/src/llama_stack_client/types/span_with_status.py b/src/llama_stack_client/types/span_with_status.py
deleted file mode 100644
index f93f4ff5..00000000
--- a/src/llama_stack_client/types/span_with_status.py
+++ /dev/null
@@ -1,27 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-from datetime import datetime
-from typing_extensions import Literal
-
-from .._models import BaseModel
-
-__all__ = ["SpanWithStatus"]
-
-
-class SpanWithStatus(BaseModel):
-    name: str
-
-    span_id: str
-
-    start_time: datetime
-
-    trace_id: str
-
-    attributes: Optional[Dict[str, Union[bool, float, str, List[object], object, None]]] = None
-
-    end_time: Optional[datetime] = None
-
-    parent_span_id: Optional[str] = None
-
-    status: Optional[Literal["ok", "error"]] = None
diff --git a/src/llama_stack_client/types/synthetic_data_generation_generate_params.py b/src/llama_stack_client/types/synthetic_data_generation_generate_params.py
deleted file mode 100644
index abf51059..00000000
--- a/src/llama_stack_client/types/synthetic_data_generation_generate_params.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Iterable
-from typing_extensions import Literal, Required, TypedDict
-
-from .shared_params.message import Message
-
-__all__ = ["SyntheticDataGenerationGenerateParams"]
-
-
-class SyntheticDataGenerationGenerateParams(TypedDict, total=False):
-    dialogs: Required[Iterable[Message]]
-
-    filtering_function: Required[Literal["none", "random", "top_k", "top_p", "top_k_top_p", "sigmoid"]]
-    """The type of filtering function."""
-
-    model: str
diff --git a/src/llama_stack_client/types/synthetic_data_generation_response.py b/src/llama_stack_client/types/synthetic_data_generation_response.py
deleted file mode 100644
index a2ee11e6..00000000
--- a/src/llama_stack_client/types/synthetic_data_generation_response.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-
-from .._models import BaseModel
-
-__all__ = ["SyntheticDataGenerationResponse"]
-
-
-class SyntheticDataGenerationResponse(BaseModel):
-    synthetic_data: List[Dict[str, Union[bool, float, str, List[object], object, None]]]
-
-    statistics: Optional[Dict[str, Union[bool, float, str, List[object], object, None]]] = None
diff --git a/src/llama_stack_client/types/telemetry_get_span_response.py b/src/llama_stack_client/types/telemetry_get_span_response.py
deleted file mode 100644
index 9e50ed0d..00000000
--- a/src/llama_stack_client/types/telemetry_get_span_response.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-from datetime import datetime
-
-from .._models import BaseModel
-
-__all__ = ["TelemetryGetSpanResponse"]
-
-
-class TelemetryGetSpanResponse(BaseModel):
-    name: str
-
-    span_id: str
-
-    start_time: datetime
-
-    trace_id: str
-
-    attributes: Optional[Dict[str, Union[bool, float, str, List[object], object, None]]] = None
-
-    end_time: Optional[datetime] = None
-
-    parent_span_id: Optional[str] = None
diff --git a/src/llama_stack_client/types/telemetry_get_span_tree_params.py b/src/llama_stack_client/types/telemetry_get_span_tree_params.py
deleted file mode 100644
index 7d309d3e..00000000
--- a/src/llama_stack_client/types/telemetry_get_span_tree_params.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import List
-from typing_extensions import TypedDict
-
-__all__ = ["TelemetryGetSpanTreeParams"]
-
-
-class TelemetryGetSpanTreeParams(TypedDict, total=False):
-    attributes_to_return: List[str]
-    """The attributes to return in the tree."""
-
-    max_depth: int
-    """The maximum depth of the tree."""
diff --git a/src/llama_stack_client/types/telemetry_get_span_tree_response.py b/src/llama_stack_client/types/telemetry_get_span_tree_response.py
deleted file mode 100644
index b72e6158..00000000
--- a/src/llama_stack_client/types/telemetry_get_span_tree_response.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict
-from typing_extensions import TypeAlias
-
-from .span_with_status import SpanWithStatus
-
-__all__ = ["TelemetryGetSpanTreeResponse"]
-
-TelemetryGetSpanTreeResponse: TypeAlias = Dict[str, SpanWithStatus]
diff --git a/src/llama_stack_client/types/telemetry_log_event_params.py b/src/llama_stack_client/types/telemetry_log_event_params.py
deleted file mode 100644
index 246b6526..00000000
--- a/src/llama_stack_client/types/telemetry_log_event_params.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Required, TypedDict
-
-from .event_param import EventParam
-
-__all__ = ["TelemetryLogEventParams"]
-
-
-class TelemetryLogEventParams(TypedDict, total=False):
-    event: Required[EventParam]
-    """The event to log."""
-
-    ttl_seconds: Required[int]
-    """The time to live of the event."""
diff --git a/src/llama_stack_client/types/telemetry_query_spans_params.py b/src/llama_stack_client/types/telemetry_query_spans_params.py
deleted file mode 100644
index 6429c08f..00000000
--- a/src/llama_stack_client/types/telemetry_query_spans_params.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import List, Iterable
-from typing_extensions import Required, TypedDict
-
-from .query_condition_param import QueryConditionParam
-
-__all__ = ["TelemetryQuerySpansParams"]
-
-
-class TelemetryQuerySpansParams(TypedDict, total=False):
-    attribute_filters: Required[Iterable[QueryConditionParam]]
-    """The attribute filters to apply to the spans."""
-
-    attributes_to_return: Required[List[str]]
-    """The attributes to return in the spans."""
-
-    max_depth: int
-    """The maximum depth of the tree."""
diff --git a/src/llama_stack_client/types/telemetry_query_spans_response.py b/src/llama_stack_client/types/telemetry_query_spans_response.py
deleted file mode 100644
index c630efeb..00000000
--- a/src/llama_stack_client/types/telemetry_query_spans_response.py
+++ /dev/null
@@ -1,28 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-from datetime import datetime
-from typing_extensions import TypeAlias
-
-from .._models import BaseModel
-
-__all__ = ["TelemetryQuerySpansResponse", "TelemetryQuerySpansResponseItem"]
-
-
-class TelemetryQuerySpansResponseItem(BaseModel):
-    name: str
-
-    span_id: str
-
-    start_time: datetime
-
-    trace_id: str
-
-    attributes: Optional[Dict[str, Union[bool, float, str, List[object], object, None]]] = None
-
-    end_time: Optional[datetime] = None
-
-    parent_span_id: Optional[str] = None
-
-
-TelemetryQuerySpansResponse: TypeAlias = List[TelemetryQuerySpansResponseItem]
diff --git a/src/llama_stack_client/types/telemetry_query_traces_params.py b/src/llama_stack_client/types/telemetry_query_traces_params.py
deleted file mode 100644
index 7c82ef14..00000000
--- a/src/llama_stack_client/types/telemetry_query_traces_params.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import List, Iterable
-from typing_extensions import TypedDict
-
-from .query_condition_param import QueryConditionParam
-
-__all__ = ["TelemetryQueryTracesParams"]
-
-
-class TelemetryQueryTracesParams(TypedDict, total=False):
-    attribute_filters: Iterable[QueryConditionParam]
-    """The attribute filters to apply to the traces."""
-
-    limit: int
-    """The limit of traces to return."""
-
-    offset: int
-    """The offset of the traces to return."""
-
-    order_by: List[str]
-    """The order by of the traces to return."""
diff --git a/src/llama_stack_client/types/telemetry_query_traces_response.py b/src/llama_stack_client/types/telemetry_query_traces_response.py
deleted file mode 100644
index 01a1365d..00000000
--- a/src/llama_stack_client/types/telemetry_query_traces_response.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List
-from typing_extensions import TypeAlias
-
-from .trace import Trace
-
-__all__ = ["TelemetryQueryTracesResponse"]
-
-TelemetryQueryTracesResponse: TypeAlias = List[Trace]
diff --git a/src/llama_stack_client/types/telemetry_save_spans_to_dataset_params.py b/src/llama_stack_client/types/telemetry_save_spans_to_dataset_params.py
deleted file mode 100644
index bb96f8e3..00000000
--- a/src/llama_stack_client/types/telemetry_save_spans_to_dataset_params.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import List, Iterable
-from typing_extensions import Required, TypedDict
-
-from .query_condition_param import QueryConditionParam
-
-__all__ = ["TelemetrySaveSpansToDatasetParams"]
-
-
-class TelemetrySaveSpansToDatasetParams(TypedDict, total=False):
-    attribute_filters: Required[Iterable[QueryConditionParam]]
-    """The attribute filters to apply to the spans."""
-
-    attributes_to_save: Required[List[str]]
-    """The attributes to save to the dataset."""
-
-    dataset_id: Required[str]
-    """The ID of the dataset to save the spans to."""
-
-    max_depth: int
-    """The maximum depth of the tree."""
diff --git a/src/llama_stack_client/types/token_log_probs.py b/src/llama_stack_client/types/token_log_probs.py
deleted file mode 100644
index b1a0a2b4..00000000
--- a/src/llama_stack_client/types/token_log_probs.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict
-
-from .._models import BaseModel
-
-__all__ = ["TokenLogProbs"]
-
-
-class TokenLogProbs(BaseModel):
-    logprobs_by_token: Dict[str, float]
-    """Dictionary mapping tokens to their log probabilities"""
diff --git a/src/llama_stack_client/types/tool.py b/src/llama_stack_client/types/tool.py
deleted file mode 100644
index 6beb8764..00000000
--- a/src/llama_stack_client/types/tool.py
+++ /dev/null
@@ -1,38 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-from typing_extensions import Literal
-
-from .._models import BaseModel
-
-__all__ = ["Tool", "Parameter"]
-
-
-class Parameter(BaseModel):
-    description: str
-
-    name: str
-
-    parameter_type: str
-
-    required: bool
-
-    default: Union[bool, float, str, List[object], object, None] = None
-
-
-class Tool(BaseModel):
-    description: str
-
-    identifier: str
-
-    parameters: List[Parameter]
-
-    provider_id: str
-
-    toolgroup_id: str
-
-    type: Literal["tool"]
-
-    metadata: Optional[Dict[str, Union[bool, float, str, List[object], object, None]]] = None
-
-    provider_resource_id: Optional[str] = None
diff --git a/src/llama_stack_client/types/tool_def.py b/src/llama_stack_client/types/tool_def.py
deleted file mode 100644
index d96c5c5d..00000000
--- a/src/llama_stack_client/types/tool_def.py
+++ /dev/null
@@ -1,29 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-
-from .._models import BaseModel
-
-__all__ = ["ToolDef", "Parameter"]
-
-
-class Parameter(BaseModel):
-    description: str
-
-    name: str
-
-    parameter_type: str
-
-    required: bool
-
-    default: Union[bool, float, str, List[object], object, None] = None
-
-
-class ToolDef(BaseModel):
-    name: str
-
-    description: Optional[str] = None
-
-    metadata: Optional[Dict[str, Union[bool, float, str, List[object], object, None]]] = None
-
-    parameters: Optional[List[Parameter]] = None
diff --git a/src/llama_stack_client/types/tool_def_param.py b/src/llama_stack_client/types/tool_def_param.py
deleted file mode 100644
index 42d27fbd..00000000
--- a/src/llama_stack_client/types/tool_def_param.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union, Iterable
-from typing_extensions import Required, TypedDict
-
-__all__ = ["ToolDefParam", "Parameter"]
-
-
-class Parameter(TypedDict, total=False):
-    description: Required[str]
-
-    name: Required[str]
-
-    parameter_type: Required[str]
-
-    required: Required[bool]
-
-    default: Union[bool, float, str, Iterable[object], object, None]
-
-
-class ToolDefParam(TypedDict, total=False):
-    name: Required[str]
-
-    description: str
-
-    metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
-
-    parameters: Iterable[Parameter]
diff --git a/src/llama_stack_client/types/tool_execution_step.py b/src/llama_stack_client/types/tool_execution_step.py
deleted file mode 100644
index f68115fc..00000000
--- a/src/llama_stack_client/types/tool_execution_step.py
+++ /dev/null
@@ -1,34 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Optional
-from datetime import datetime
-from typing_extensions import Literal
-
-from .._models import BaseModel
-from .tool_response import ToolResponse
-from .shared.tool_call import ToolCall
-
-__all__ = ["ToolExecutionStep"]
-
-
-class ToolExecutionStep(BaseModel):
-    step_id: str
-    """The ID of the step."""
-
-    step_type: Literal["tool_execution"]
-    """Type of the step in an agent turn."""
-
-    tool_calls: List[ToolCall]
-    """The tool calls to execute."""
-
-    tool_responses: List[ToolResponse]
-    """The tool responses from the tool calls."""
-
-    turn_id: str
-    """The ID of the turn."""
-
-    completed_at: Optional[datetime] = None
-    """The time the step completed."""
-
-    started_at: Optional[datetime] = None
-    """The time the step started."""
diff --git a/src/llama_stack_client/types/tool_group.py b/src/llama_stack_client/types/tool_group.py
deleted file mode 100644
index 3389395a..00000000
--- a/src/llama_stack_client/types/tool_group.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-from typing_extensions import Literal
-
-from .._models import BaseModel
-
-__all__ = ["ToolGroup", "McpEndpoint"]
-
-
-class McpEndpoint(BaseModel):
-    uri: str
-
-
-class ToolGroup(BaseModel):
-    identifier: str
-
-    provider_id: str
-
-    type: Literal["tool_group"]
-
-    args: Optional[Dict[str, Union[bool, float, str, List[object], object, None]]] = None
-
-    mcp_endpoint: Optional[McpEndpoint] = None
-
-    provider_resource_id: Optional[str] = None
diff --git a/src/llama_stack_client/types/tool_invocation_result.py b/src/llama_stack_client/types/tool_invocation_result.py
deleted file mode 100644
index 01f7db28..00000000
--- a/src/llama_stack_client/types/tool_invocation_result.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-
-from .._models import BaseModel
-from .shared.interleaved_content import InterleavedContent
-
-__all__ = ["ToolInvocationResult"]
-
-
-class ToolInvocationResult(BaseModel):
-    content: Optional[InterleavedContent] = None
-    """A image content item"""
-
-    error_code: Optional[int] = None
-
-    error_message: Optional[str] = None
-
-    metadata: Optional[Dict[str, Union[bool, float, str, List[object], object, None]]] = None
diff --git a/src/llama_stack_client/types/tool_response.py b/src/llama_stack_client/types/tool_response.py
deleted file mode 100644
index f984f30a..00000000
--- a/src/llama_stack_client/types/tool_response.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-from typing_extensions import Literal
-
-from .._models import BaseModel
-from .shared.interleaved_content import InterleavedContent
-
-__all__ = ["ToolResponse"]
-
-
-class ToolResponse(BaseModel):
-    call_id: str
-
-    content: InterleavedContent
-    """A image content item"""
-
-    tool_name: Union[Literal["brave_search", "wolfram_alpha", "photogen", "code_interpreter"], str]
-
-    metadata: Optional[Dict[str, Union[bool, float, str, List[object], object, None]]] = None
diff --git a/src/llama_stack_client/types/tool_response_param.py b/src/llama_stack_client/types/tool_response_param.py
deleted file mode 100644
index 8ac14862..00000000
--- a/src/llama_stack_client/types/tool_response_param.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union, Iterable
-from typing_extensions import Literal, Required, TypedDict
-
-from .shared_params.interleaved_content import InterleavedContent
-
-__all__ = ["ToolResponseParam"]
-
-
-class ToolResponseParam(TypedDict, total=False):
-    call_id: Required[str]
-
-    content: Required[InterleavedContent]
-    """A image content item"""
-
-    tool_name: Required[Union[Literal["brave_search", "wolfram_alpha", "photogen", "code_interpreter"], str]]
-
-    metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
diff --git a/src/llama_stack_client/types/tool_runtime/__init__.py b/src/llama_stack_client/types/tool_runtime/__init__.py
deleted file mode 100644
index 43dd1925..00000000
--- a/src/llama_stack_client/types/tool_runtime/__init__.py
+++ /dev/null
@@ -1,6 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from .rag_tool_query_params import RagToolQueryParams as RagToolQueryParams
-from .rag_tool_insert_params import RagToolInsertParams as RagToolInsertParams
diff --git a/src/llama_stack_client/types/tool_runtime/rag_tool_insert_params.py b/src/llama_stack_client/types/tool_runtime/rag_tool_insert_params.py
deleted file mode 100644
index bc52c481..00000000
--- a/src/llama_stack_client/types/tool_runtime/rag_tool_insert_params.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Iterable
-from typing_extensions import Required, TypedDict
-
-from ..shared_params.document import Document
-
-__all__ = ["RagToolInsertParams"]
-
-
-class RagToolInsertParams(TypedDict, total=False):
-    chunk_size_in_tokens: Required[int]
-
-    documents: Required[Iterable[Document]]
-
-    vector_db_id: Required[str]
diff --git a/src/llama_stack_client/types/tool_runtime/rag_tool_query_params.py b/src/llama_stack_client/types/tool_runtime/rag_tool_query_params.py
deleted file mode 100644
index 4599c693..00000000
--- a/src/llama_stack_client/types/tool_runtime/rag_tool_query_params.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import List
-from typing_extensions import Required, TypedDict
-
-from ..shared_params.query_config import QueryConfig
-from ..shared_params.interleaved_content import InterleavedContent
-
-__all__ = ["RagToolQueryParams"]
-
-
-class RagToolQueryParams(TypedDict, total=False):
-    content: Required[InterleavedContent]
-    """A image content item"""
-
-    vector_db_ids: Required[List[str]]
-
-    query_config: QueryConfig
-    """Configuration for the RAG query generation."""
diff --git a/src/llama_stack_client/types/tool_runtime_invoke_tool_params.py b/src/llama_stack_client/types/tool_runtime_invoke_tool_params.py
deleted file mode 100644
index 03df2d40..00000000
--- a/src/llama_stack_client/types/tool_runtime_invoke_tool_params.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union, Iterable
-from typing_extensions import Required, TypedDict
-
-__all__ = ["ToolRuntimeInvokeToolParams"]
-
-
-class ToolRuntimeInvokeToolParams(TypedDict, total=False):
-    kwargs: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-    """A dictionary of arguments to pass to the tool."""
-
-    tool_name: Required[str]
-    """The name of the tool to invoke."""
diff --git a/src/llama_stack_client/types/tool_runtime_list_tools_params.py b/src/llama_stack_client/types/tool_runtime_list_tools_params.py
deleted file mode 100644
index 539e176d..00000000
--- a/src/llama_stack_client/types/tool_runtime_list_tools_params.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Required, TypedDict
-
-__all__ = ["ToolRuntimeListToolsParams", "McpEndpoint"]
-
-
-class ToolRuntimeListToolsParams(TypedDict, total=False):
-    mcp_endpoint: McpEndpoint
-    """The MCP endpoint to use for the tool group."""
-
-    tool_group_id: str
-    """The ID of the tool group to list tools for."""
-
-
-class McpEndpoint(TypedDict, total=False):
-    uri: Required[str]
diff --git a/src/llama_stack_client/types/tool_runtime_list_tools_response.py b/src/llama_stack_client/types/tool_runtime_list_tools_response.py
deleted file mode 100644
index cd65754f..00000000
--- a/src/llama_stack_client/types/tool_runtime_list_tools_response.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List
-from typing_extensions import TypeAlias
-
-from .tool_def import ToolDef
-
-__all__ = ["ToolRuntimeListToolsResponse"]
-
-ToolRuntimeListToolsResponse: TypeAlias = List[ToolDef]
diff --git a/src/llama_stack_client/types/toolgroup_list_response.py b/src/llama_stack_client/types/toolgroup_list_response.py
deleted file mode 100644
index 0f668de3..00000000
--- a/src/llama_stack_client/types/toolgroup_list_response.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List
-from typing_extensions import TypeAlias
-
-from .tool_group import ToolGroup
-
-__all__ = ["ToolgroupListResponse"]
-
-ToolgroupListResponse: TypeAlias = List[ToolGroup]
diff --git a/src/llama_stack_client/types/toolgroup_register_params.py b/src/llama_stack_client/types/toolgroup_register_params.py
deleted file mode 100644
index a50c14c4..00000000
--- a/src/llama_stack_client/types/toolgroup_register_params.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union, Iterable
-from typing_extensions import Required, TypedDict
-
-__all__ = ["ToolgroupRegisterParams", "McpEndpoint"]
-
-
-class ToolgroupRegisterParams(TypedDict, total=False):
-    provider_id: Required[str]
-    """The ID of the provider to use for the tool group."""
-
-    toolgroup_id: Required[str]
-    """The ID of the tool group to register."""
-
-    args: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
-    """A dictionary of arguments to pass to the tool group."""
-
-    mcp_endpoint: McpEndpoint
-    """The MCP endpoint to use for the tool group."""
-
-
-class McpEndpoint(TypedDict, total=False):
-    uri: Required[str]
diff --git a/src/llama_stack_client/types/trace.py b/src/llama_stack_client/types/trace.py
deleted file mode 100644
index 3683551c..00000000
--- a/src/llama_stack_client/types/trace.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Optional
-from datetime import datetime
-
-from .._models import BaseModel
-
-__all__ = ["Trace"]
-
-
-class Trace(BaseModel):
-    root_span_id: str
-
-    start_time: datetime
-
-    trace_id: str
-
-    end_time: Optional[datetime] = None
diff --git a/src/llama_stack_client/types/vector_db_list_response.py b/src/llama_stack_client/types/vector_db_list_response.py
deleted file mode 100644
index 39161431..00000000
--- a/src/llama_stack_client/types/vector_db_list_response.py
+++ /dev/null
@@ -1,25 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Optional
-from typing_extensions import Literal, TypeAlias
-
-from .._models import BaseModel
-
-__all__ = ["VectorDBListResponse", "VectorDBListResponseItem"]
-
-
-class VectorDBListResponseItem(BaseModel):
-    embedding_dimension: int
-
-    embedding_model: str
-
-    identifier: str
-
-    provider_id: str
-
-    type: Literal["vector_db"]
-
-    provider_resource_id: Optional[str] = None
-
-
-VectorDBListResponse: TypeAlias = List[VectorDBListResponseItem]
diff --git a/src/llama_stack_client/types/vector_db_register_params.py b/src/llama_stack_client/types/vector_db_register_params.py
deleted file mode 100644
index 734659a6..00000000
--- a/src/llama_stack_client/types/vector_db_register_params.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Required, TypedDict
-
-__all__ = ["VectorDBRegisterParams"]
-
-
-class VectorDBRegisterParams(TypedDict, total=False):
-    embedding_model: Required[str]
-    """The embedding model to use."""
-
-    vector_db_id: Required[str]
-    """The identifier of the vector database to register."""
-
-    embedding_dimension: int
-    """The dimension of the embedding model."""
-
-    provider_id: str
-    """The identifier of the provider."""
-
-    provider_vector_db_id: str
-    """The identifier of the vector database in the provider."""
diff --git a/src/llama_stack_client/types/vector_db_register_response.py b/src/llama_stack_client/types/vector_db_register_response.py
deleted file mode 100644
index 9c7a3166..00000000
--- a/src/llama_stack_client/types/vector_db_register_response.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Optional
-from typing_extensions import Literal
-
-from .._models import BaseModel
-
-__all__ = ["VectorDBRegisterResponse"]
-
-
-class VectorDBRegisterResponse(BaseModel):
-    embedding_dimension: int
-
-    embedding_model: str
-
-    identifier: str
-
-    provider_id: str
-
-    type: Literal["vector_db"]
-
-    provider_resource_id: Optional[str] = None
diff --git a/src/llama_stack_client/types/vector_db_retrieve_response.py b/src/llama_stack_client/types/vector_db_retrieve_response.py
deleted file mode 100644
index fb3597a5..00000000
--- a/src/llama_stack_client/types/vector_db_retrieve_response.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Optional
-from typing_extensions import Literal
-
-from .._models import BaseModel
-
-__all__ = ["VectorDBRetrieveResponse"]
-
-
-class VectorDBRetrieveResponse(BaseModel):
-    embedding_dimension: int
-
-    embedding_model: str
-
-    identifier: str
-
-    provider_id: str
-
-    type: Literal["vector_db"]
-
-    provider_resource_id: Optional[str] = None
diff --git a/src/llama_stack_client/types/vector_io_insert_params.py b/src/llama_stack_client/types/vector_io_insert_params.py
deleted file mode 100644
index 5b6580fe..00000000
--- a/src/llama_stack_client/types/vector_io_insert_params.py
+++ /dev/null
@@ -1,94 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union, Iterable
-from typing_extensions import Required, TypedDict
-
-from .shared_params.interleaved_content import InterleavedContent
-
-__all__ = ["VectorIoInsertParams", "Chunk", "ChunkChunkMetadata"]
-
-
-class VectorIoInsertParams(TypedDict, total=False):
-    chunks: Required[Iterable[Chunk]]
-    """The chunks to insert.
-
-    Each `Chunk` should contain content which can be interleaved text, images, or
-    other types. `metadata`: `dict[str, Any]` and `embedding`: `List[float]` are
-    optional. If `metadata` is provided, you configure how Llama Stack formats the
-    chunk during generation. If `embedding` is not provided, it will be computed
-    later.
-    """
-
-    vector_db_id: Required[str]
-    """The identifier of the vector database to insert the chunks into."""
-
-    ttl_seconds: int
-    """The time to live of the chunks."""
-
-
-class ChunkChunkMetadata(TypedDict, total=False):
-    chunk_embedding_dimension: int
-    """The dimension of the embedding vector for the chunk."""
-
-    chunk_embedding_model: str
-    """The embedding model used to create the chunk's embedding."""
-
-    chunk_id: str
-    """The ID of the chunk.
-
-    If not set, it will be generated based on the document ID and content.
-    """
-
-    chunk_tokenizer: str
-    """The tokenizer used to create the chunk. Default is Tiktoken."""
-
-    chunk_window: str
-    """The window of the chunk, which can be used to group related chunks together."""
-
-    content_token_count: int
-    """The number of tokens in the content of the chunk."""
-
-    created_timestamp: int
-    """An optional timestamp indicating when the chunk was created."""
-
-    document_id: str
-    """The ID of the document this chunk belongs to."""
-
-    metadata_token_count: int
-    """The number of tokens in the metadata of the chunk."""
-
-    source: str
-    """The source of the content, such as a URL, file path, or other identifier."""
-
-    updated_timestamp: int
-    """An optional timestamp indicating when the chunk was last updated."""
-
-
-class Chunk(TypedDict, total=False):
-    content: Required[InterleavedContent]
-    """
-    The content of the chunk, which can be interleaved text, images, or other types.
-    """
-
-    metadata: Required[Dict[str, Union[bool, float, str, Iterable[object], object, None]]]
-    """
-    Metadata associated with the chunk that will be used in the model context during
-    inference.
-    """
-
-    chunk_metadata: ChunkChunkMetadata
-    """Metadata for the chunk that will NOT be used in the context during inference.
-
-    The `chunk_metadata` is required backend functionality.
-    """
-
-    embedding: Iterable[float]
-    """Optional embedding for the chunk. If not provided, it will be computed later."""
-
-    stored_chunk_id: str
-    """The chunk ID that is stored in the vector database.
-
-    Used for backend functionality.
-    """
diff --git a/src/llama_stack_client/types/vector_io_query_params.py b/src/llama_stack_client/types/vector_io_query_params.py
deleted file mode 100644
index f0569a58..00000000
--- a/src/llama_stack_client/types/vector_io_query_params.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union, Iterable
-from typing_extensions import Required, TypedDict
-
-from .shared_params.interleaved_content import InterleavedContent
-
-__all__ = ["VectorIoQueryParams"]
-
-
-class VectorIoQueryParams(TypedDict, total=False):
-    query: Required[InterleavedContent]
-    """The query to search for."""
-
-    vector_db_id: Required[str]
-    """The identifier of the vector database to query."""
-
-    params: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
-    """The parameters of the query."""
diff --git a/src/llama_stack_client/types/vector_store.py b/src/llama_stack_client/types/vector_store.py
deleted file mode 100644
index 5dc4ad3a..00000000
--- a/src/llama_stack_client/types/vector_store.py
+++ /dev/null
@@ -1,44 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-import builtins
-from typing import Dict, List, Union, Optional
-
-from .._models import BaseModel
-
-__all__ = ["VectorStore", "FileCounts"]
-
-
-class FileCounts(BaseModel):
-    cancelled: int
-
-    completed: int
-
-    failed: int
-
-    in_progress: int
-
-    total: int
-
-
-class VectorStore(BaseModel):
-    id: str
-
-    created_at: int
-
-    file_counts: FileCounts
-
-    metadata: Dict[str, Union[bool, float, str, List[object], object, None]]
-
-    object: str
-
-    status: str
-
-    usage_bytes: int
-
-    expires_after: Optional[Dict[str, Union[bool, float, str, List[builtins.object], builtins.object, None]]] = None
-
-    expires_at: Optional[int] = None
-
-    last_active_at: Optional[int] = None
-
-    name: Optional[str] = None
diff --git a/src/llama_stack_client/types/vector_store_create_params.py b/src/llama_stack_client/types/vector_store_create_params.py
deleted file mode 100644
index 18748f48..00000000
--- a/src/llama_stack_client/types/vector_store_create_params.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, List, Union, Iterable
-from typing_extensions import Required, TypedDict
-
-__all__ = ["VectorStoreCreateParams"]
-
-
-class VectorStoreCreateParams(TypedDict, total=False):
-    name: Required[str]
-    """A name for the vector store."""
-
-    chunking_strategy: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
-    """The chunking strategy used to chunk the file(s).
-
-    If not set, will use the `auto` strategy.
-    """
-
-    embedding_dimension: int
-    """The dimension of the embedding vectors (default: 384)."""
-
-    embedding_model: str
-    """The embedding model to use for this vector store."""
-
-    expires_after: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
-    """The expiration policy for a vector store."""
-
-    file_ids: List[str]
-    """A list of File IDs that the vector store should use.
-
-    Useful for tools like `file_search` that can access files.
-    """
-
-    metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
-    """Set of 16 key-value pairs that can be attached to an object."""
-
-    provider_id: str
-    """The ID of the provider to use for this vector store."""
-
-    provider_vector_db_id: str
-    """The provider-specific vector database ID."""
diff --git a/src/llama_stack_client/types/vector_store_delete_response.py b/src/llama_stack_client/types/vector_store_delete_response.py
deleted file mode 100644
index 945ada10..00000000
--- a/src/llama_stack_client/types/vector_store_delete_response.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .._models import BaseModel
-
-__all__ = ["VectorStoreDeleteResponse"]
-
-
-class VectorStoreDeleteResponse(BaseModel):
-    id: str
-
-    deleted: bool
-
-    object: str
diff --git a/src/llama_stack_client/types/vector_store_list_params.py b/src/llama_stack_client/types/vector_store_list_params.py
deleted file mode 100644
index 176a6279..00000000
--- a/src/llama_stack_client/types/vector_store_list_params.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import TypedDict
-
-__all__ = ["VectorStoreListParams"]
-
-
-class VectorStoreListParams(TypedDict, total=False):
-    after: str
-    """A cursor for use in pagination.
-
-    `after` is an object ID that defines your place in the list.
-    """
-
-    before: str
-    """A cursor for use in pagination.
-
-    `before` is an object ID that defines your place in the list.
-    """
-
-    limit: int
-    """A limit on the number of objects to be returned.
-
-    Limit can range between 1 and 100, and the default is 20.
-    """
-
-    order: str
-    """Sort order by the `created_at` timestamp of the objects.
-
-    `asc` for ascending order and `desc` for descending order.
-    """
diff --git a/src/llama_stack_client/types/vector_store_search_params.py b/src/llama_stack_client/types/vector_store_search_params.py
deleted file mode 100644
index fdb02ff7..00000000
--- a/src/llama_stack_client/types/vector_store_search_params.py
+++ /dev/null
@@ -1,34 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, List, Union, Iterable
-from typing_extensions import Required, TypedDict
-
-__all__ = ["VectorStoreSearchParams", "RankingOptions"]
-
-
-class VectorStoreSearchParams(TypedDict, total=False):
-    query: Required[Union[str, List[str]]]
-    """The query string or array for performing the search."""
-
-    filters: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
-    """Filters based on file attributes to narrow the search results."""
-
-    max_num_results: int
-    """Maximum number of results to return (1 to 50 inclusive, default 10)."""
-
-    ranking_options: RankingOptions
-    """Ranking options for fine-tuning the search results."""
-
-    rewrite_query: bool
-    """Whether to rewrite the natural language query for vector search (default false)"""
-
-    search_mode: str
-    """The search mode to use - "keyword", "vector", or "hybrid" (default "vector")"""
-
-
-class RankingOptions(TypedDict, total=False):
-    ranker: str
-
-    score_threshold: float
diff --git a/src/llama_stack_client/types/vector_store_search_response.py b/src/llama_stack_client/types/vector_store_search_response.py
deleted file mode 100644
index 7b596e03..00000000
--- a/src/llama_stack_client/types/vector_store_search_response.py
+++ /dev/null
@@ -1,38 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-from typing_extensions import Literal
-
-from .._models import BaseModel
-
-__all__ = ["VectorStoreSearchResponse", "Data", "DataContent"]
-
-
-class DataContent(BaseModel):
-    text: str
-
-    type: Literal["text"]
-
-
-class Data(BaseModel):
-    content: List[DataContent]
-
-    file_id: str
-
-    filename: str
-
-    score: float
-
-    attributes: Optional[Dict[str, Union[str, float, bool]]] = None
-
-
-class VectorStoreSearchResponse(BaseModel):
-    data: List[Data]
-
-    has_more: bool
-
-    object: str
-
-    search_query: str
-
-    next_page: Optional[str] = None
diff --git a/src/llama_stack_client/types/vector_store_update_params.py b/src/llama_stack_client/types/vector_store_update_params.py
deleted file mode 100644
index d1e069ed..00000000
--- a/src/llama_stack_client/types/vector_store_update_params.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union, Iterable
-from typing_extensions import TypedDict
-
-__all__ = ["VectorStoreUpdateParams"]
-
-
-class VectorStoreUpdateParams(TypedDict, total=False):
-    expires_after: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
-    """The expiration policy for a vector store."""
-
-    metadata: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
-    """Set of 16 key-value pairs that can be attached to an object."""
-
-    name: str
-    """The name of the vector store."""
diff --git a/src/llama_stack_client/types/vector_stores/__init__.py b/src/llama_stack_client/types/vector_stores/__init__.py
deleted file mode 100644
index 550270e2..00000000
--- a/src/llama_stack_client/types/vector_stores/__init__.py
+++ /dev/null
@@ -1,6 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from .vector_store_file import VectorStoreFile as VectorStoreFile
-from .file_create_params import FileCreateParams as FileCreateParams
diff --git a/src/llama_stack_client/types/vector_stores/file_create_params.py b/src/llama_stack_client/types/vector_stores/file_create_params.py
deleted file mode 100644
index 66fbf624..00000000
--- a/src/llama_stack_client/types/vector_stores/file_create_params.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, Union, Iterable
-from typing_extensions import Literal, Required, TypeAlias, TypedDict
-
-__all__ = [
-    "FileCreateParams",
-    "ChunkingStrategy",
-    "ChunkingStrategyVectorStoreChunkingStrategyAuto",
-    "ChunkingStrategyVectorStoreChunkingStrategyStatic",
-    "ChunkingStrategyVectorStoreChunkingStrategyStaticStatic",
-]
-
-
-class FileCreateParams(TypedDict, total=False):
-    file_id: Required[str]
-    """The ID of the file to attach to the vector store."""
-
-    attributes: Dict[str, Union[bool, float, str, Iterable[object], object, None]]
-    """The key-value attributes stored with the file, which can be used for filtering."""
-
-    chunking_strategy: ChunkingStrategy
-    """The chunking strategy to use for the file."""
-
-
-class ChunkingStrategyVectorStoreChunkingStrategyAuto(TypedDict, total=False):
-    type: Required[Literal["auto"]]
-
-
-class ChunkingStrategyVectorStoreChunkingStrategyStaticStatic(TypedDict, total=False):
-    chunk_overlap_tokens: Required[int]
-
-    max_chunk_size_tokens: Required[int]
-
-
-class ChunkingStrategyVectorStoreChunkingStrategyStatic(TypedDict, total=False):
-    static: Required[ChunkingStrategyVectorStoreChunkingStrategyStaticStatic]
-
-    type: Required[Literal["static"]]
-
-
-ChunkingStrategy: TypeAlias = Union[
-    ChunkingStrategyVectorStoreChunkingStrategyAuto, ChunkingStrategyVectorStoreChunkingStrategyStatic
-]
diff --git a/src/llama_stack_client/types/vector_stores/vector_store_file.py b/src/llama_stack_client/types/vector_stores/vector_store_file.py
deleted file mode 100644
index 45ce03f8..00000000
--- a/src/llama_stack_client/types/vector_stores/vector_store_file.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Union, Optional
-from typing_extensions import Literal, Annotated, TypeAlias
-
-from ..._utils import PropertyInfo
-from ..._models import BaseModel
-
-__all__ = [
-    "VectorStoreFile",
-    "ChunkingStrategy",
-    "ChunkingStrategyVectorStoreChunkingStrategyAuto",
-    "ChunkingStrategyVectorStoreChunkingStrategyStatic",
-    "ChunkingStrategyVectorStoreChunkingStrategyStaticStatic",
-    "LastError",
-]
-
-
-class ChunkingStrategyVectorStoreChunkingStrategyAuto(BaseModel):
-    type: Literal["auto"]
-
-
-class ChunkingStrategyVectorStoreChunkingStrategyStaticStatic(BaseModel):
-    chunk_overlap_tokens: int
-
-    max_chunk_size_tokens: int
-
-
-class ChunkingStrategyVectorStoreChunkingStrategyStatic(BaseModel):
-    static: ChunkingStrategyVectorStoreChunkingStrategyStaticStatic
-
-    type: Literal["static"]
-
-
-ChunkingStrategy: TypeAlias = Annotated[
-    Union[ChunkingStrategyVectorStoreChunkingStrategyAuto, ChunkingStrategyVectorStoreChunkingStrategyStatic],
-    PropertyInfo(discriminator="type"),
-]
-
-
-class LastError(BaseModel):
-    code: Literal["server_error", "rate_limit_exceeded"]
-
-    message: str
-
-
-class VectorStoreFile(BaseModel):
-    id: str
-
-    attributes: Dict[str, Union[bool, float, str, List[object], object, None]]
-
-    chunking_strategy: ChunkingStrategy
-
-    created_at: int
-
-    object: str
-
-    status: Literal["completed", "in_progress", "cancelled", "failed"]
-
-    usage_bytes: int
-
-    vector_store_id: str
-
-    last_error: Optional[LastError] = None
diff --git a/src/llama_stack_client/types/version_info.py b/src/llama_stack_client/types/version_info.py
deleted file mode 100644
index 5fc5bbb4..00000000
--- a/src/llama_stack_client/types/version_info.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .._models import BaseModel
-
-__all__ = ["VersionInfo"]
-
-
-class VersionInfo(BaseModel):
-    version: str
diff --git a/tests/api_resources/agents/test_session.py b/tests/api_resources/agents/test_session.py
deleted file mode 100644
index 2c80df58..00000000
--- a/tests/api_resources/agents/test_session.py
+++ /dev/null
@@ -1,321 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types.agents import (
-    Session,
-    SessionCreateResponse,
-)
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestSession:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_create(self, client: LlamaStackClient) -> None:
-        session = client.agents.session.create(
-            agent_id="agent_id",
-            session_name="session_name",
-        )
-        assert_matches_type(SessionCreateResponse, session, path=["response"])
-
-    @parametrize
-    def test_raw_response_create(self, client: LlamaStackClient) -> None:
-        response = client.agents.session.with_raw_response.create(
-            agent_id="agent_id",
-            session_name="session_name",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        session = response.parse()
-        assert_matches_type(SessionCreateResponse, session, path=["response"])
-
-    @parametrize
-    def test_streaming_response_create(self, client: LlamaStackClient) -> None:
-        with client.agents.session.with_streaming_response.create(
-            agent_id="agent_id",
-            session_name="session_name",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            session = response.parse()
-            assert_matches_type(SessionCreateResponse, session, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_create(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.agents.session.with_raw_response.create(
-                agent_id="",
-                session_name="session_name",
-            )
-
-    @parametrize
-    def test_method_retrieve(self, client: LlamaStackClient) -> None:
-        session = client.agents.session.retrieve(
-            session_id="session_id",
-            agent_id="agent_id",
-        )
-        assert_matches_type(Session, session, path=["response"])
-
-    @parametrize
-    def test_method_retrieve_with_all_params(self, client: LlamaStackClient) -> None:
-        session = client.agents.session.retrieve(
-            session_id="session_id",
-            agent_id="agent_id",
-            turn_ids=["string"],
-        )
-        assert_matches_type(Session, session, path=["response"])
-
-    @parametrize
-    def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
-        response = client.agents.session.with_raw_response.retrieve(
-            session_id="session_id",
-            agent_id="agent_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        session = response.parse()
-        assert_matches_type(Session, session, path=["response"])
-
-    @parametrize
-    def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
-        with client.agents.session.with_streaming_response.retrieve(
-            session_id="session_id",
-            agent_id="agent_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            session = response.parse()
-            assert_matches_type(Session, session, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.agents.session.with_raw_response.retrieve(
-                session_id="session_id",
-                agent_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            client.agents.session.with_raw_response.retrieve(
-                session_id="",
-                agent_id="agent_id",
-            )
-
-    @parametrize
-    def test_method_delete(self, client: LlamaStackClient) -> None:
-        session = client.agents.session.delete(
-            session_id="session_id",
-            agent_id="agent_id",
-        )
-        assert session is None
-
-    @parametrize
-    def test_raw_response_delete(self, client: LlamaStackClient) -> None:
-        response = client.agents.session.with_raw_response.delete(
-            session_id="session_id",
-            agent_id="agent_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        session = response.parse()
-        assert session is None
-
-    @parametrize
-    def test_streaming_response_delete(self, client: LlamaStackClient) -> None:
-        with client.agents.session.with_streaming_response.delete(
-            session_id="session_id",
-            agent_id="agent_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            session = response.parse()
-            assert session is None
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_delete(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.agents.session.with_raw_response.delete(
-                session_id="session_id",
-                agent_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            client.agents.session.with_raw_response.delete(
-                session_id="",
-                agent_id="agent_id",
-            )
-
-
-class TestAsyncSession:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_create(self, async_client: AsyncLlamaStackClient) -> None:
-        session = await async_client.agents.session.create(
-            agent_id="agent_id",
-            session_name="session_name",
-        )
-        assert_matches_type(SessionCreateResponse, session, path=["response"])
-
-    @parametrize
-    async def test_raw_response_create(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.agents.session.with_raw_response.create(
-            agent_id="agent_id",
-            session_name="session_name",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        session = await response.parse()
-        assert_matches_type(SessionCreateResponse, session, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_create(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.agents.session.with_streaming_response.create(
-            agent_id="agent_id",
-            session_name="session_name",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            session = await response.parse()
-            assert_matches_type(SessionCreateResponse, session, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_create(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.agents.session.with_raw_response.create(
-                agent_id="",
-                session_name="session_name",
-            )
-
-    @parametrize
-    async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        session = await async_client.agents.session.retrieve(
-            session_id="session_id",
-            agent_id="agent_id",
-        )
-        assert_matches_type(Session, session, path=["response"])
-
-    @parametrize
-    async def test_method_retrieve_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        session = await async_client.agents.session.retrieve(
-            session_id="session_id",
-            agent_id="agent_id",
-            turn_ids=["string"],
-        )
-        assert_matches_type(Session, session, path=["response"])
-
-    @parametrize
-    async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.agents.session.with_raw_response.retrieve(
-            session_id="session_id",
-            agent_id="agent_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        session = await response.parse()
-        assert_matches_type(Session, session, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.agents.session.with_streaming_response.retrieve(
-            session_id="session_id",
-            agent_id="agent_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            session = await response.parse()
-            assert_matches_type(Session, session, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.agents.session.with_raw_response.retrieve(
-                session_id="session_id",
-                agent_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            await async_client.agents.session.with_raw_response.retrieve(
-                session_id="",
-                agent_id="agent_id",
-            )
-
-    @parametrize
-    async def test_method_delete(self, async_client: AsyncLlamaStackClient) -> None:
-        session = await async_client.agents.session.delete(
-            session_id="session_id",
-            agent_id="agent_id",
-        )
-        assert session is None
-
-    @parametrize
-    async def test_raw_response_delete(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.agents.session.with_raw_response.delete(
-            session_id="session_id",
-            agent_id="agent_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        session = await response.parse()
-        assert session is None
-
-    @parametrize
-    async def test_streaming_response_delete(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.agents.session.with_streaming_response.delete(
-            session_id="session_id",
-            agent_id="agent_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            session = await response.parse()
-            assert session is None
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_delete(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.agents.session.with_raw_response.delete(
-                session_id="session_id",
-                agent_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            await async_client.agents.session.with_raw_response.delete(
-                session_id="",
-                agent_id="agent_id",
-            )
diff --git a/tests/api_resources/agents/test_steps.py b/tests/api_resources/agents/test_steps.py
deleted file mode 100644
index 5555a9a4..00000000
--- a/tests/api_resources/agents/test_steps.py
+++ /dev/null
@@ -1,172 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types.agents import StepRetrieveResponse
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestSteps:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_retrieve(self, client: LlamaStackClient) -> None:
-        step = client.agents.steps.retrieve(
-            step_id="step_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            turn_id="turn_id",
-        )
-        assert_matches_type(StepRetrieveResponse, step, path=["response"])
-
-    @parametrize
-    def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
-        response = client.agents.steps.with_raw_response.retrieve(
-            step_id="step_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            turn_id="turn_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        step = response.parse()
-        assert_matches_type(StepRetrieveResponse, step, path=["response"])
-
-    @parametrize
-    def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
-        with client.agents.steps.with_streaming_response.retrieve(
-            step_id="step_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            turn_id="turn_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            step = response.parse()
-            assert_matches_type(StepRetrieveResponse, step, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.agents.steps.with_raw_response.retrieve(
-                step_id="step_id",
-                agent_id="",
-                session_id="session_id",
-                turn_id="turn_id",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            client.agents.steps.with_raw_response.retrieve(
-                step_id="step_id",
-                agent_id="agent_id",
-                session_id="",
-                turn_id="turn_id",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `turn_id` but received ''"):
-            client.agents.steps.with_raw_response.retrieve(
-                step_id="step_id",
-                agent_id="agent_id",
-                session_id="session_id",
-                turn_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `step_id` but received ''"):
-            client.agents.steps.with_raw_response.retrieve(
-                step_id="",
-                agent_id="agent_id",
-                session_id="session_id",
-                turn_id="turn_id",
-            )
-
-
-class TestAsyncSteps:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        step = await async_client.agents.steps.retrieve(
-            step_id="step_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            turn_id="turn_id",
-        )
-        assert_matches_type(StepRetrieveResponse, step, path=["response"])
-
-    @parametrize
-    async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.agents.steps.with_raw_response.retrieve(
-            step_id="step_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            turn_id="turn_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        step = await response.parse()
-        assert_matches_type(StepRetrieveResponse, step, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.agents.steps.with_streaming_response.retrieve(
-            step_id="step_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            turn_id="turn_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            step = await response.parse()
-            assert_matches_type(StepRetrieveResponse, step, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.agents.steps.with_raw_response.retrieve(
-                step_id="step_id",
-                agent_id="",
-                session_id="session_id",
-                turn_id="turn_id",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            await async_client.agents.steps.with_raw_response.retrieve(
-                step_id="step_id",
-                agent_id="agent_id",
-                session_id="",
-                turn_id="turn_id",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `turn_id` but received ''"):
-            await async_client.agents.steps.with_raw_response.retrieve(
-                step_id="step_id",
-                agent_id="agent_id",
-                session_id="session_id",
-                turn_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `step_id` but received ''"):
-            await async_client.agents.steps.with_raw_response.retrieve(
-                step_id="",
-                agent_id="agent_id",
-                session_id="session_id",
-                turn_id="turn_id",
-            )
diff --git a/tests/api_resources/agents/test_turn.py b/tests/api_resources/agents/test_turn.py
deleted file mode 100644
index 31eb53f9..00000000
--- a/tests/api_resources/agents/test_turn.py
+++ /dev/null
@@ -1,1030 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types.agents import Turn
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestTurn:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_create_overload_1(self, client: LlamaStackClient) -> None:
-        turn = client.agents.turn.create(
-            session_id="session_id",
-            agent_id="agent_id",
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-        )
-        assert_matches_type(Turn, turn, path=["response"])
-
-    @parametrize
-    def test_method_create_with_all_params_overload_1(self, client: LlamaStackClient) -> None:
-        turn = client.agents.turn.create(
-            session_id="session_id",
-            agent_id="agent_id",
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                    "context": "string",
-                }
-            ],
-            documents=[
-                {
-                    "content": "string",
-                    "mime_type": "mime_type",
-                }
-            ],
-            stream=False,
-            tool_config={
-                "system_message_behavior": "append",
-                "tool_choice": "auto",
-                "tool_prompt_format": "json",
-            },
-            toolgroups=["string"],
-        )
-        assert_matches_type(Turn, turn, path=["response"])
-
-    @parametrize
-    def test_raw_response_create_overload_1(self, client: LlamaStackClient) -> None:
-        response = client.agents.turn.with_raw_response.create(
-            session_id="session_id",
-            agent_id="agent_id",
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        turn = response.parse()
-        assert_matches_type(Turn, turn, path=["response"])
-
-    @parametrize
-    def test_streaming_response_create_overload_1(self, client: LlamaStackClient) -> None:
-        with client.agents.turn.with_streaming_response.create(
-            session_id="session_id",
-            agent_id="agent_id",
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            turn = response.parse()
-            assert_matches_type(Turn, turn, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_create_overload_1(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.agents.turn.with_raw_response.create(
-                session_id="session_id",
-                agent_id="",
-                messages=[
-                    {
-                        "content": "string",
-                        "role": "user",
-                    }
-                ],
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            client.agents.turn.with_raw_response.create(
-                session_id="",
-                agent_id="agent_id",
-                messages=[
-                    {
-                        "content": "string",
-                        "role": "user",
-                    }
-                ],
-            )
-
-    @parametrize
-    def test_method_create_overload_2(self, client: LlamaStackClient) -> None:
-        turn_stream = client.agents.turn.create(
-            session_id="session_id",
-            agent_id="agent_id",
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            stream=True,
-        )
-        turn_stream.response.close()
-
-    @parametrize
-    def test_method_create_with_all_params_overload_2(self, client: LlamaStackClient) -> None:
-        turn_stream = client.agents.turn.create(
-            session_id="session_id",
-            agent_id="agent_id",
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                    "context": "string",
-                }
-            ],
-            stream=True,
-            documents=[
-                {
-                    "content": "string",
-                    "mime_type": "mime_type",
-                }
-            ],
-            tool_config={
-                "system_message_behavior": "append",
-                "tool_choice": "auto",
-                "tool_prompt_format": "json",
-            },
-            toolgroups=["string"],
-        )
-        turn_stream.response.close()
-
-    @parametrize
-    def test_raw_response_create_overload_2(self, client: LlamaStackClient) -> None:
-        response = client.agents.turn.with_raw_response.create(
-            session_id="session_id",
-            agent_id="agent_id",
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            stream=True,
-        )
-
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        stream = response.parse()
-        stream.close()
-
-    @parametrize
-    def test_streaming_response_create_overload_2(self, client: LlamaStackClient) -> None:
-        with client.agents.turn.with_streaming_response.create(
-            session_id="session_id",
-            agent_id="agent_id",
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            stream=True,
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            stream = response.parse()
-            stream.close()
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_create_overload_2(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.agents.turn.with_raw_response.create(
-                session_id="session_id",
-                agent_id="",
-                messages=[
-                    {
-                        "content": "string",
-                        "role": "user",
-                    }
-                ],
-                stream=True,
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            client.agents.turn.with_raw_response.create(
-                session_id="",
-                agent_id="agent_id",
-                messages=[
-                    {
-                        "content": "string",
-                        "role": "user",
-                    }
-                ],
-                stream=True,
-            )
-
-    @parametrize
-    def test_method_retrieve(self, client: LlamaStackClient) -> None:
-        turn = client.agents.turn.retrieve(
-            turn_id="turn_id",
-            agent_id="agent_id",
-            session_id="session_id",
-        )
-        assert_matches_type(Turn, turn, path=["response"])
-
-    @parametrize
-    def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
-        response = client.agents.turn.with_raw_response.retrieve(
-            turn_id="turn_id",
-            agent_id="agent_id",
-            session_id="session_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        turn = response.parse()
-        assert_matches_type(Turn, turn, path=["response"])
-
-    @parametrize
-    def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
-        with client.agents.turn.with_streaming_response.retrieve(
-            turn_id="turn_id",
-            agent_id="agent_id",
-            session_id="session_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            turn = response.parse()
-            assert_matches_type(Turn, turn, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.agents.turn.with_raw_response.retrieve(
-                turn_id="turn_id",
-                agent_id="",
-                session_id="session_id",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            client.agents.turn.with_raw_response.retrieve(
-                turn_id="turn_id",
-                agent_id="agent_id",
-                session_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `turn_id` but received ''"):
-            client.agents.turn.with_raw_response.retrieve(
-                turn_id="",
-                agent_id="agent_id",
-                session_id="session_id",
-            )
-
-    @parametrize
-    def test_method_resume_overload_1(self, client: LlamaStackClient) -> None:
-        turn = client.agents.turn.resume(
-            turn_id="turn_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            tool_responses=[
-                {
-                    "call_id": "call_id",
-                    "content": "string",
-                    "tool_name": "brave_search",
-                }
-            ],
-        )
-        assert_matches_type(Turn, turn, path=["response"])
-
-    @parametrize
-    def test_method_resume_with_all_params_overload_1(self, client: LlamaStackClient) -> None:
-        turn = client.agents.turn.resume(
-            turn_id="turn_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            tool_responses=[
-                {
-                    "call_id": "call_id",
-                    "content": "string",
-                    "tool_name": "brave_search",
-                    "metadata": {"foo": True},
-                }
-            ],
-            stream=False,
-        )
-        assert_matches_type(Turn, turn, path=["response"])
-
-    @parametrize
-    def test_raw_response_resume_overload_1(self, client: LlamaStackClient) -> None:
-        response = client.agents.turn.with_raw_response.resume(
-            turn_id="turn_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            tool_responses=[
-                {
-                    "call_id": "call_id",
-                    "content": "string",
-                    "tool_name": "brave_search",
-                }
-            ],
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        turn = response.parse()
-        assert_matches_type(Turn, turn, path=["response"])
-
-    @parametrize
-    def test_streaming_response_resume_overload_1(self, client: LlamaStackClient) -> None:
-        with client.agents.turn.with_streaming_response.resume(
-            turn_id="turn_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            tool_responses=[
-                {
-                    "call_id": "call_id",
-                    "content": "string",
-                    "tool_name": "brave_search",
-                }
-            ],
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            turn = response.parse()
-            assert_matches_type(Turn, turn, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_resume_overload_1(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.agents.turn.with_raw_response.resume(
-                turn_id="turn_id",
-                agent_id="",
-                session_id="session_id",
-                tool_responses=[
-                    {
-                        "call_id": "call_id",
-                        "content": "string",
-                        "tool_name": "brave_search",
-                    }
-                ],
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            client.agents.turn.with_raw_response.resume(
-                turn_id="turn_id",
-                agent_id="agent_id",
-                session_id="",
-                tool_responses=[
-                    {
-                        "call_id": "call_id",
-                        "content": "string",
-                        "tool_name": "brave_search",
-                    }
-                ],
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `turn_id` but received ''"):
-            client.agents.turn.with_raw_response.resume(
-                turn_id="",
-                agent_id="agent_id",
-                session_id="session_id",
-                tool_responses=[
-                    {
-                        "call_id": "call_id",
-                        "content": "string",
-                        "tool_name": "brave_search",
-                    }
-                ],
-            )
-
-    @parametrize
-    def test_method_resume_overload_2(self, client: LlamaStackClient) -> None:
-        turn_stream = client.agents.turn.resume(
-            turn_id="turn_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            stream=True,
-            tool_responses=[
-                {
-                    "call_id": "call_id",
-                    "content": "string",
-                    "tool_name": "brave_search",
-                }
-            ],
-        )
-        turn_stream.response.close()
-
-    @parametrize
-    def test_raw_response_resume_overload_2(self, client: LlamaStackClient) -> None:
-        response = client.agents.turn.with_raw_response.resume(
-            turn_id="turn_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            stream=True,
-            tool_responses=[
-                {
-                    "call_id": "call_id",
-                    "content": "string",
-                    "tool_name": "brave_search",
-                }
-            ],
-        )
-
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        stream = response.parse()
-        stream.close()
-
-    @parametrize
-    def test_streaming_response_resume_overload_2(self, client: LlamaStackClient) -> None:
-        with client.agents.turn.with_streaming_response.resume(
-            turn_id="turn_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            stream=True,
-            tool_responses=[
-                {
-                    "call_id": "call_id",
-                    "content": "string",
-                    "tool_name": "brave_search",
-                }
-            ],
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            stream = response.parse()
-            stream.close()
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_resume_overload_2(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.agents.turn.with_raw_response.resume(
-                turn_id="turn_id",
-                agent_id="",
-                session_id="session_id",
-                stream=True,
-                tool_responses=[
-                    {
-                        "call_id": "call_id",
-                        "content": "string",
-                        "tool_name": "brave_search",
-                    }
-                ],
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            client.agents.turn.with_raw_response.resume(
-                turn_id="turn_id",
-                agent_id="agent_id",
-                session_id="",
-                stream=True,
-                tool_responses=[
-                    {
-                        "call_id": "call_id",
-                        "content": "string",
-                        "tool_name": "brave_search",
-                    }
-                ],
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `turn_id` but received ''"):
-            client.agents.turn.with_raw_response.resume(
-                turn_id="",
-                agent_id="agent_id",
-                session_id="session_id",
-                stream=True,
-                tool_responses=[
-                    {
-                        "call_id": "call_id",
-                        "content": "string",
-                        "tool_name": "brave_search",
-                    }
-                ],
-            )
-
-
-class TestAsyncTurn:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        turn = await async_client.agents.turn.create(
-            session_id="session_id",
-            agent_id="agent_id",
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-        )
-        assert_matches_type(Turn, turn, path=["response"])
-
-    @parametrize
-    async def test_method_create_with_all_params_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        turn = await async_client.agents.turn.create(
-            session_id="session_id",
-            agent_id="agent_id",
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                    "context": "string",
-                }
-            ],
-            documents=[
-                {
-                    "content": "string",
-                    "mime_type": "mime_type",
-                }
-            ],
-            stream=False,
-            tool_config={
-                "system_message_behavior": "append",
-                "tool_choice": "auto",
-                "tool_prompt_format": "json",
-            },
-            toolgroups=["string"],
-        )
-        assert_matches_type(Turn, turn, path=["response"])
-
-    @parametrize
-    async def test_raw_response_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.agents.turn.with_raw_response.create(
-            session_id="session_id",
-            agent_id="agent_id",
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        turn = await response.parse()
-        assert_matches_type(Turn, turn, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.agents.turn.with_streaming_response.create(
-            session_id="session_id",
-            agent_id="agent_id",
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            turn = await response.parse()
-            assert_matches_type(Turn, turn, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.agents.turn.with_raw_response.create(
-                session_id="session_id",
-                agent_id="",
-                messages=[
-                    {
-                        "content": "string",
-                        "role": "user",
-                    }
-                ],
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            await async_client.agents.turn.with_raw_response.create(
-                session_id="",
-                agent_id="agent_id",
-                messages=[
-                    {
-                        "content": "string",
-                        "role": "user",
-                    }
-                ],
-            )
-
-    @parametrize
-    async def test_method_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        turn_stream = await async_client.agents.turn.create(
-            session_id="session_id",
-            agent_id="agent_id",
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            stream=True,
-        )
-        await turn_stream.response.aclose()
-
-    @parametrize
-    async def test_method_create_with_all_params_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        turn_stream = await async_client.agents.turn.create(
-            session_id="session_id",
-            agent_id="agent_id",
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                    "context": "string",
-                }
-            ],
-            stream=True,
-            documents=[
-                {
-                    "content": "string",
-                    "mime_type": "mime_type",
-                }
-            ],
-            tool_config={
-                "system_message_behavior": "append",
-                "tool_choice": "auto",
-                "tool_prompt_format": "json",
-            },
-            toolgroups=["string"],
-        )
-        await turn_stream.response.aclose()
-
-    @parametrize
-    async def test_raw_response_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.agents.turn.with_raw_response.create(
-            session_id="session_id",
-            agent_id="agent_id",
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            stream=True,
-        )
-
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        stream = await response.parse()
-        await stream.close()
-
-    @parametrize
-    async def test_streaming_response_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.agents.turn.with_streaming_response.create(
-            session_id="session_id",
-            agent_id="agent_id",
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            stream=True,
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            stream = await response.parse()
-            await stream.close()
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.agents.turn.with_raw_response.create(
-                session_id="session_id",
-                agent_id="",
-                messages=[
-                    {
-                        "content": "string",
-                        "role": "user",
-                    }
-                ],
-                stream=True,
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            await async_client.agents.turn.with_raw_response.create(
-                session_id="",
-                agent_id="agent_id",
-                messages=[
-                    {
-                        "content": "string",
-                        "role": "user",
-                    }
-                ],
-                stream=True,
-            )
-
-    @parametrize
-    async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        turn = await async_client.agents.turn.retrieve(
-            turn_id="turn_id",
-            agent_id="agent_id",
-            session_id="session_id",
-        )
-        assert_matches_type(Turn, turn, path=["response"])
-
-    @parametrize
-    async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.agents.turn.with_raw_response.retrieve(
-            turn_id="turn_id",
-            agent_id="agent_id",
-            session_id="session_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        turn = await response.parse()
-        assert_matches_type(Turn, turn, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.agents.turn.with_streaming_response.retrieve(
-            turn_id="turn_id",
-            agent_id="agent_id",
-            session_id="session_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            turn = await response.parse()
-            assert_matches_type(Turn, turn, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.agents.turn.with_raw_response.retrieve(
-                turn_id="turn_id",
-                agent_id="",
-                session_id="session_id",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            await async_client.agents.turn.with_raw_response.retrieve(
-                turn_id="turn_id",
-                agent_id="agent_id",
-                session_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `turn_id` but received ''"):
-            await async_client.agents.turn.with_raw_response.retrieve(
-                turn_id="",
-                agent_id="agent_id",
-                session_id="session_id",
-            )
-
-    @parametrize
-    async def test_method_resume_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        turn = await async_client.agents.turn.resume(
-            turn_id="turn_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            tool_responses=[
-                {
-                    "call_id": "call_id",
-                    "content": "string",
-                    "tool_name": "brave_search",
-                }
-            ],
-        )
-        assert_matches_type(Turn, turn, path=["response"])
-
-    @parametrize
-    async def test_method_resume_with_all_params_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        turn = await async_client.agents.turn.resume(
-            turn_id="turn_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            tool_responses=[
-                {
-                    "call_id": "call_id",
-                    "content": "string",
-                    "tool_name": "brave_search",
-                    "metadata": {"foo": True},
-                }
-            ],
-            stream=False,
-        )
-        assert_matches_type(Turn, turn, path=["response"])
-
-    @parametrize
-    async def test_raw_response_resume_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.agents.turn.with_raw_response.resume(
-            turn_id="turn_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            tool_responses=[
-                {
-                    "call_id": "call_id",
-                    "content": "string",
-                    "tool_name": "brave_search",
-                }
-            ],
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        turn = await response.parse()
-        assert_matches_type(Turn, turn, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_resume_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.agents.turn.with_streaming_response.resume(
-            turn_id="turn_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            tool_responses=[
-                {
-                    "call_id": "call_id",
-                    "content": "string",
-                    "tool_name": "brave_search",
-                }
-            ],
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            turn = await response.parse()
-            assert_matches_type(Turn, turn, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_resume_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.agents.turn.with_raw_response.resume(
-                turn_id="turn_id",
-                agent_id="",
-                session_id="session_id",
-                tool_responses=[
-                    {
-                        "call_id": "call_id",
-                        "content": "string",
-                        "tool_name": "brave_search",
-                    }
-                ],
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            await async_client.agents.turn.with_raw_response.resume(
-                turn_id="turn_id",
-                agent_id="agent_id",
-                session_id="",
-                tool_responses=[
-                    {
-                        "call_id": "call_id",
-                        "content": "string",
-                        "tool_name": "brave_search",
-                    }
-                ],
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `turn_id` but received ''"):
-            await async_client.agents.turn.with_raw_response.resume(
-                turn_id="",
-                agent_id="agent_id",
-                session_id="session_id",
-                tool_responses=[
-                    {
-                        "call_id": "call_id",
-                        "content": "string",
-                        "tool_name": "brave_search",
-                    }
-                ],
-            )
-
-    @parametrize
-    async def test_method_resume_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        turn_stream = await async_client.agents.turn.resume(
-            turn_id="turn_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            stream=True,
-            tool_responses=[
-                {
-                    "call_id": "call_id",
-                    "content": "string",
-                    "tool_name": "brave_search",
-                }
-            ],
-        )
-        await turn_stream.response.aclose()
-
-    @parametrize
-    async def test_raw_response_resume_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.agents.turn.with_raw_response.resume(
-            turn_id="turn_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            stream=True,
-            tool_responses=[
-                {
-                    "call_id": "call_id",
-                    "content": "string",
-                    "tool_name": "brave_search",
-                }
-            ],
-        )
-
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        stream = await response.parse()
-        await stream.close()
-
-    @parametrize
-    async def test_streaming_response_resume_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.agents.turn.with_streaming_response.resume(
-            turn_id="turn_id",
-            agent_id="agent_id",
-            session_id="session_id",
-            stream=True,
-            tool_responses=[
-                {
-                    "call_id": "call_id",
-                    "content": "string",
-                    "tool_name": "brave_search",
-                }
-            ],
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            stream = await response.parse()
-            await stream.close()
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_resume_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.agents.turn.with_raw_response.resume(
-                turn_id="turn_id",
-                agent_id="",
-                session_id="session_id",
-                stream=True,
-                tool_responses=[
-                    {
-                        "call_id": "call_id",
-                        "content": "string",
-                        "tool_name": "brave_search",
-                    }
-                ],
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `session_id` but received ''"):
-            await async_client.agents.turn.with_raw_response.resume(
-                turn_id="turn_id",
-                agent_id="agent_id",
-                session_id="",
-                stream=True,
-                tool_responses=[
-                    {
-                        "call_id": "call_id",
-                        "content": "string",
-                        "tool_name": "brave_search",
-                    }
-                ],
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `turn_id` but received ''"):
-            await async_client.agents.turn.with_raw_response.resume(
-                turn_id="",
-                agent_id="agent_id",
-                session_id="session_id",
-                stream=True,
-                tool_responses=[
-                    {
-                        "call_id": "call_id",
-                        "content": "string",
-                        "tool_name": "brave_search",
-                    }
-                ],
-            )
diff --git a/tests/api_resources/chat/__init__.py b/tests/api_resources/chat/__init__.py
deleted file mode 100644
index fd8019a9..00000000
--- a/tests/api_resources/chat/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/chat/test_completions.py b/tests/api_resources/chat/test_completions.py
deleted file mode 100644
index 496ea061..00000000
--- a/tests/api_resources/chat/test_completions.py
+++ /dev/null
@@ -1,514 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types.chat import (
-    CompletionListResponse,
-    CompletionCreateResponse,
-    CompletionRetrieveResponse,
-)
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestCompletions:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_create_overload_1(self, client: LlamaStackClient) -> None:
-        completion = client.chat.completions.create(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            model="model",
-        )
-        assert_matches_type(CompletionCreateResponse, completion, path=["response"])
-
-    @parametrize
-    def test_method_create_with_all_params_overload_1(self, client: LlamaStackClient) -> None:
-        completion = client.chat.completions.create(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                    "name": "name",
-                }
-            ],
-            model="model",
-            frequency_penalty=0,
-            function_call="string",
-            functions=[{"foo": True}],
-            logit_bias={"foo": 0},
-            logprobs=True,
-            max_completion_tokens=0,
-            max_tokens=0,
-            n=0,
-            parallel_tool_calls=True,
-            presence_penalty=0,
-            response_format={"type": "text"},
-            seed=0,
-            stop="string",
-            stream=False,
-            stream_options={"foo": True},
-            temperature=0,
-            tool_choice="string",
-            tools=[{"foo": True}],
-            top_logprobs=0,
-            top_p=0,
-            user="user",
-        )
-        assert_matches_type(CompletionCreateResponse, completion, path=["response"])
-
-    @parametrize
-    def test_raw_response_create_overload_1(self, client: LlamaStackClient) -> None:
-        response = client.chat.completions.with_raw_response.create(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            model="model",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        completion = response.parse()
-        assert_matches_type(CompletionCreateResponse, completion, path=["response"])
-
-    @parametrize
-    def test_streaming_response_create_overload_1(self, client: LlamaStackClient) -> None:
-        with client.chat.completions.with_streaming_response.create(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            model="model",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            completion = response.parse()
-            assert_matches_type(CompletionCreateResponse, completion, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_create_overload_2(self, client: LlamaStackClient) -> None:
-        completion_stream = client.chat.completions.create(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            model="model",
-            stream=True,
-        )
-        completion_stream.response.close()
-
-    @parametrize
-    def test_method_create_with_all_params_overload_2(self, client: LlamaStackClient) -> None:
-        completion_stream = client.chat.completions.create(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                    "name": "name",
-                }
-            ],
-            model="model",
-            stream=True,
-            frequency_penalty=0,
-            function_call="string",
-            functions=[{"foo": True}],
-            logit_bias={"foo": 0},
-            logprobs=True,
-            max_completion_tokens=0,
-            max_tokens=0,
-            n=0,
-            parallel_tool_calls=True,
-            presence_penalty=0,
-            response_format={"type": "text"},
-            seed=0,
-            stop="string",
-            stream_options={"foo": True},
-            temperature=0,
-            tool_choice="string",
-            tools=[{"foo": True}],
-            top_logprobs=0,
-            top_p=0,
-            user="user",
-        )
-        completion_stream.response.close()
-
-    @parametrize
-    def test_raw_response_create_overload_2(self, client: LlamaStackClient) -> None:
-        response = client.chat.completions.with_raw_response.create(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            model="model",
-            stream=True,
-        )
-
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        stream = response.parse()
-        stream.close()
-
-    @parametrize
-    def test_streaming_response_create_overload_2(self, client: LlamaStackClient) -> None:
-        with client.chat.completions.with_streaming_response.create(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            model="model",
-            stream=True,
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            stream = response.parse()
-            stream.close()
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_retrieve(self, client: LlamaStackClient) -> None:
-        completion = client.chat.completions.retrieve(
-            "completion_id",
-        )
-        assert_matches_type(CompletionRetrieveResponse, completion, path=["response"])
-
-    @parametrize
-    def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
-        response = client.chat.completions.with_raw_response.retrieve(
-            "completion_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        completion = response.parse()
-        assert_matches_type(CompletionRetrieveResponse, completion, path=["response"])
-
-    @parametrize
-    def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
-        with client.chat.completions.with_streaming_response.retrieve(
-            "completion_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            completion = response.parse()
-            assert_matches_type(CompletionRetrieveResponse, completion, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `completion_id` but received ''"):
-            client.chat.completions.with_raw_response.retrieve(
-                "",
-            )
-
-    @parametrize
-    def test_method_list(self, client: LlamaStackClient) -> None:
-        completion = client.chat.completions.list()
-        assert_matches_type(CompletionListResponse, completion, path=["response"])
-
-    @parametrize
-    def test_method_list_with_all_params(self, client: LlamaStackClient) -> None:
-        completion = client.chat.completions.list(
-            after="after",
-            limit=0,
-            model="model",
-            order="asc",
-        )
-        assert_matches_type(CompletionListResponse, completion, path=["response"])
-
-    @parametrize
-    def test_raw_response_list(self, client: LlamaStackClient) -> None:
-        response = client.chat.completions.with_raw_response.list()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        completion = response.parse()
-        assert_matches_type(CompletionListResponse, completion, path=["response"])
-
-    @parametrize
-    def test_streaming_response_list(self, client: LlamaStackClient) -> None:
-        with client.chat.completions.with_streaming_response.list() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            completion = response.parse()
-            assert_matches_type(CompletionListResponse, completion, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-
-class TestAsyncCompletions:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        completion = await async_client.chat.completions.create(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            model="model",
-        )
-        assert_matches_type(CompletionCreateResponse, completion, path=["response"])
-
-    @parametrize
-    async def test_method_create_with_all_params_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        completion = await async_client.chat.completions.create(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                    "name": "name",
-                }
-            ],
-            model="model",
-            frequency_penalty=0,
-            function_call="string",
-            functions=[{"foo": True}],
-            logit_bias={"foo": 0},
-            logprobs=True,
-            max_completion_tokens=0,
-            max_tokens=0,
-            n=0,
-            parallel_tool_calls=True,
-            presence_penalty=0,
-            response_format={"type": "text"},
-            seed=0,
-            stop="string",
-            stream=False,
-            stream_options={"foo": True},
-            temperature=0,
-            tool_choice="string",
-            tools=[{"foo": True}],
-            top_logprobs=0,
-            top_p=0,
-            user="user",
-        )
-        assert_matches_type(CompletionCreateResponse, completion, path=["response"])
-
-    @parametrize
-    async def test_raw_response_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.chat.completions.with_raw_response.create(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            model="model",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        completion = await response.parse()
-        assert_matches_type(CompletionCreateResponse, completion, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.chat.completions.with_streaming_response.create(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            model="model",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            completion = await response.parse()
-            assert_matches_type(CompletionCreateResponse, completion, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        completion_stream = await async_client.chat.completions.create(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            model="model",
-            stream=True,
-        )
-        await completion_stream.response.aclose()
-
-    @parametrize
-    async def test_method_create_with_all_params_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        completion_stream = await async_client.chat.completions.create(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                    "name": "name",
-                }
-            ],
-            model="model",
-            stream=True,
-            frequency_penalty=0,
-            function_call="string",
-            functions=[{"foo": True}],
-            logit_bias={"foo": 0},
-            logprobs=True,
-            max_completion_tokens=0,
-            max_tokens=0,
-            n=0,
-            parallel_tool_calls=True,
-            presence_penalty=0,
-            response_format={"type": "text"},
-            seed=0,
-            stop="string",
-            stream_options={"foo": True},
-            temperature=0,
-            tool_choice="string",
-            tools=[{"foo": True}],
-            top_logprobs=0,
-            top_p=0,
-            user="user",
-        )
-        await completion_stream.response.aclose()
-
-    @parametrize
-    async def test_raw_response_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.chat.completions.with_raw_response.create(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            model="model",
-            stream=True,
-        )
-
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        stream = await response.parse()
-        await stream.close()
-
-    @parametrize
-    async def test_streaming_response_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.chat.completions.with_streaming_response.create(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            model="model",
-            stream=True,
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            stream = await response.parse()
-            await stream.close()
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        completion = await async_client.chat.completions.retrieve(
-            "completion_id",
-        )
-        assert_matches_type(CompletionRetrieveResponse, completion, path=["response"])
-
-    @parametrize
-    async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.chat.completions.with_raw_response.retrieve(
-            "completion_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        completion = await response.parse()
-        assert_matches_type(CompletionRetrieveResponse, completion, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.chat.completions.with_streaming_response.retrieve(
-            "completion_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            completion = await response.parse()
-            assert_matches_type(CompletionRetrieveResponse, completion, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `completion_id` but received ''"):
-            await async_client.chat.completions.with_raw_response.retrieve(
-                "",
-            )
-
-    @parametrize
-    async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
-        completion = await async_client.chat.completions.list()
-        assert_matches_type(CompletionListResponse, completion, path=["response"])
-
-    @parametrize
-    async def test_method_list_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        completion = await async_client.chat.completions.list(
-            after="after",
-            limit=0,
-            model="model",
-            order="asc",
-        )
-        assert_matches_type(CompletionListResponse, completion, path=["response"])
-
-    @parametrize
-    async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.chat.completions.with_raw_response.list()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        completion = await response.parse()
-        assert_matches_type(CompletionListResponse, completion, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.chat.completions.with_streaming_response.list() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            completion = await response.parse()
-            assert_matches_type(CompletionListResponse, completion, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/eval/__init__.py b/tests/api_resources/eval/__init__.py
deleted file mode 100644
index fd8019a9..00000000
--- a/tests/api_resources/eval/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/eval/test_jobs.py b/tests/api_resources/eval/test_jobs.py
deleted file mode 100644
index 17b02896..00000000
--- a/tests/api_resources/eval/test_jobs.py
+++ /dev/null
@@ -1,312 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import Job, EvaluateResponse
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestJobs:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_retrieve(self, client: LlamaStackClient) -> None:
-        job = client.eval.jobs.retrieve(
-            job_id="job_id",
-            benchmark_id="benchmark_id",
-        )
-        assert_matches_type(EvaluateResponse, job, path=["response"])
-
-    @parametrize
-    def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
-        response = client.eval.jobs.with_raw_response.retrieve(
-            job_id="job_id",
-            benchmark_id="benchmark_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        job = response.parse()
-        assert_matches_type(EvaluateResponse, job, path=["response"])
-
-    @parametrize
-    def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
-        with client.eval.jobs.with_streaming_response.retrieve(
-            job_id="job_id",
-            benchmark_id="benchmark_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            job = response.parse()
-            assert_matches_type(EvaluateResponse, job, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            client.eval.jobs.with_raw_response.retrieve(
-                job_id="job_id",
-                benchmark_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"):
-            client.eval.jobs.with_raw_response.retrieve(
-                job_id="",
-                benchmark_id="benchmark_id",
-            )
-
-    @parametrize
-    def test_method_cancel(self, client: LlamaStackClient) -> None:
-        job = client.eval.jobs.cancel(
-            job_id="job_id",
-            benchmark_id="benchmark_id",
-        )
-        assert job is None
-
-    @parametrize
-    def test_raw_response_cancel(self, client: LlamaStackClient) -> None:
-        response = client.eval.jobs.with_raw_response.cancel(
-            job_id="job_id",
-            benchmark_id="benchmark_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        job = response.parse()
-        assert job is None
-
-    @parametrize
-    def test_streaming_response_cancel(self, client: LlamaStackClient) -> None:
-        with client.eval.jobs.with_streaming_response.cancel(
-            job_id="job_id",
-            benchmark_id="benchmark_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            job = response.parse()
-            assert job is None
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_cancel(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            client.eval.jobs.with_raw_response.cancel(
-                job_id="job_id",
-                benchmark_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"):
-            client.eval.jobs.with_raw_response.cancel(
-                job_id="",
-                benchmark_id="benchmark_id",
-            )
-
-    @parametrize
-    def test_method_status(self, client: LlamaStackClient) -> None:
-        job = client.eval.jobs.status(
-            job_id="job_id",
-            benchmark_id="benchmark_id",
-        )
-        assert_matches_type(Job, job, path=["response"])
-
-    @parametrize
-    def test_raw_response_status(self, client: LlamaStackClient) -> None:
-        response = client.eval.jobs.with_raw_response.status(
-            job_id="job_id",
-            benchmark_id="benchmark_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        job = response.parse()
-        assert_matches_type(Job, job, path=["response"])
-
-    @parametrize
-    def test_streaming_response_status(self, client: LlamaStackClient) -> None:
-        with client.eval.jobs.with_streaming_response.status(
-            job_id="job_id",
-            benchmark_id="benchmark_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            job = response.parse()
-            assert_matches_type(Job, job, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_status(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            client.eval.jobs.with_raw_response.status(
-                job_id="job_id",
-                benchmark_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"):
-            client.eval.jobs.with_raw_response.status(
-                job_id="",
-                benchmark_id="benchmark_id",
-            )
-
-
-class TestAsyncJobs:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        job = await async_client.eval.jobs.retrieve(
-            job_id="job_id",
-            benchmark_id="benchmark_id",
-        )
-        assert_matches_type(EvaluateResponse, job, path=["response"])
-
-    @parametrize
-    async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.eval.jobs.with_raw_response.retrieve(
-            job_id="job_id",
-            benchmark_id="benchmark_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        job = await response.parse()
-        assert_matches_type(EvaluateResponse, job, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.eval.jobs.with_streaming_response.retrieve(
-            job_id="job_id",
-            benchmark_id="benchmark_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            job = await response.parse()
-            assert_matches_type(EvaluateResponse, job, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            await async_client.eval.jobs.with_raw_response.retrieve(
-                job_id="job_id",
-                benchmark_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"):
-            await async_client.eval.jobs.with_raw_response.retrieve(
-                job_id="",
-                benchmark_id="benchmark_id",
-            )
-
-    @parametrize
-    async def test_method_cancel(self, async_client: AsyncLlamaStackClient) -> None:
-        job = await async_client.eval.jobs.cancel(
-            job_id="job_id",
-            benchmark_id="benchmark_id",
-        )
-        assert job is None
-
-    @parametrize
-    async def test_raw_response_cancel(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.eval.jobs.with_raw_response.cancel(
-            job_id="job_id",
-            benchmark_id="benchmark_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        job = await response.parse()
-        assert job is None
-
-    @parametrize
-    async def test_streaming_response_cancel(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.eval.jobs.with_streaming_response.cancel(
-            job_id="job_id",
-            benchmark_id="benchmark_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            job = await response.parse()
-            assert job is None
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_cancel(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            await async_client.eval.jobs.with_raw_response.cancel(
-                job_id="job_id",
-                benchmark_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"):
-            await async_client.eval.jobs.with_raw_response.cancel(
-                job_id="",
-                benchmark_id="benchmark_id",
-            )
-
-    @parametrize
-    async def test_method_status(self, async_client: AsyncLlamaStackClient) -> None:
-        job = await async_client.eval.jobs.status(
-            job_id="job_id",
-            benchmark_id="benchmark_id",
-        )
-        assert_matches_type(Job, job, path=["response"])
-
-    @parametrize
-    async def test_raw_response_status(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.eval.jobs.with_raw_response.status(
-            job_id="job_id",
-            benchmark_id="benchmark_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        job = await response.parse()
-        assert_matches_type(Job, job, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_status(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.eval.jobs.with_streaming_response.status(
-            job_id="job_id",
-            benchmark_id="benchmark_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            job = await response.parse()
-            assert_matches_type(Job, job, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_status(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            await async_client.eval.jobs.with_raw_response.status(
-                job_id="job_id",
-                benchmark_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `job_id` but received ''"):
-            await async_client.eval.jobs.with_raw_response.status(
-                job_id="",
-                benchmark_id="benchmark_id",
-            )
diff --git a/tests/api_resources/post_training/__init__.py b/tests/api_resources/post_training/__init__.py
deleted file mode 100644
index fd8019a9..00000000
--- a/tests/api_resources/post_training/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/post_training/test_job.py b/tests/api_resources/post_training/test_job.py
deleted file mode 100644
index 158eafbc..00000000
--- a/tests/api_resources/post_training/test_job.py
+++ /dev/null
@@ -1,264 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, List, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types.post_training import (
-    JobStatusResponse,
-    JobArtifactsResponse,
-)
-from llama_stack_client.types.list_post_training_jobs_response import Data
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestJob:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_list(self, client: LlamaStackClient) -> None:
-        job = client.post_training.job.list()
-        assert_matches_type(List[Data], job, path=["response"])
-
-    @parametrize
-    def test_raw_response_list(self, client: LlamaStackClient) -> None:
-        response = client.post_training.job.with_raw_response.list()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        job = response.parse()
-        assert_matches_type(List[Data], job, path=["response"])
-
-    @parametrize
-    def test_streaming_response_list(self, client: LlamaStackClient) -> None:
-        with client.post_training.job.with_streaming_response.list() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            job = response.parse()
-            assert_matches_type(List[Data], job, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_artifacts(self, client: LlamaStackClient) -> None:
-        job = client.post_training.job.artifacts(
-            job_uuid="job_uuid",
-        )
-        assert_matches_type(JobArtifactsResponse, job, path=["response"])
-
-    @parametrize
-    def test_raw_response_artifacts(self, client: LlamaStackClient) -> None:
-        response = client.post_training.job.with_raw_response.artifacts(
-            job_uuid="job_uuid",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        job = response.parse()
-        assert_matches_type(JobArtifactsResponse, job, path=["response"])
-
-    @parametrize
-    def test_streaming_response_artifacts(self, client: LlamaStackClient) -> None:
-        with client.post_training.job.with_streaming_response.artifacts(
-            job_uuid="job_uuid",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            job = response.parse()
-            assert_matches_type(JobArtifactsResponse, job, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_cancel(self, client: LlamaStackClient) -> None:
-        job = client.post_training.job.cancel(
-            job_uuid="job_uuid",
-        )
-        assert job is None
-
-    @parametrize
-    def test_raw_response_cancel(self, client: LlamaStackClient) -> None:
-        response = client.post_training.job.with_raw_response.cancel(
-            job_uuid="job_uuid",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        job = response.parse()
-        assert job is None
-
-    @parametrize
-    def test_streaming_response_cancel(self, client: LlamaStackClient) -> None:
-        with client.post_training.job.with_streaming_response.cancel(
-            job_uuid="job_uuid",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            job = response.parse()
-            assert job is None
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_status(self, client: LlamaStackClient) -> None:
-        job = client.post_training.job.status(
-            job_uuid="job_uuid",
-        )
-        assert_matches_type(JobStatusResponse, job, path=["response"])
-
-    @parametrize
-    def test_raw_response_status(self, client: LlamaStackClient) -> None:
-        response = client.post_training.job.with_raw_response.status(
-            job_uuid="job_uuid",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        job = response.parse()
-        assert_matches_type(JobStatusResponse, job, path=["response"])
-
-    @parametrize
-    def test_streaming_response_status(self, client: LlamaStackClient) -> None:
-        with client.post_training.job.with_streaming_response.status(
-            job_uuid="job_uuid",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            job = response.parse()
-            assert_matches_type(JobStatusResponse, job, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-
-class TestAsyncJob:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
-        job = await async_client.post_training.job.list()
-        assert_matches_type(List[Data], job, path=["response"])
-
-    @parametrize
-    async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.post_training.job.with_raw_response.list()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        job = await response.parse()
-        assert_matches_type(List[Data], job, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.post_training.job.with_streaming_response.list() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            job = await response.parse()
-            assert_matches_type(List[Data], job, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_artifacts(self, async_client: AsyncLlamaStackClient) -> None:
-        job = await async_client.post_training.job.artifacts(
-            job_uuid="job_uuid",
-        )
-        assert_matches_type(JobArtifactsResponse, job, path=["response"])
-
-    @parametrize
-    async def test_raw_response_artifacts(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.post_training.job.with_raw_response.artifacts(
-            job_uuid="job_uuid",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        job = await response.parse()
-        assert_matches_type(JobArtifactsResponse, job, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_artifacts(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.post_training.job.with_streaming_response.artifacts(
-            job_uuid="job_uuid",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            job = await response.parse()
-            assert_matches_type(JobArtifactsResponse, job, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_cancel(self, async_client: AsyncLlamaStackClient) -> None:
-        job = await async_client.post_training.job.cancel(
-            job_uuid="job_uuid",
-        )
-        assert job is None
-
-    @parametrize
-    async def test_raw_response_cancel(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.post_training.job.with_raw_response.cancel(
-            job_uuid="job_uuid",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        job = await response.parse()
-        assert job is None
-
-    @parametrize
-    async def test_streaming_response_cancel(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.post_training.job.with_streaming_response.cancel(
-            job_uuid="job_uuid",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            job = await response.parse()
-            assert job is None
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_status(self, async_client: AsyncLlamaStackClient) -> None:
-        job = await async_client.post_training.job.status(
-            job_uuid="job_uuid",
-        )
-        assert_matches_type(JobStatusResponse, job, path=["response"])
-
-    @parametrize
-    async def test_raw_response_status(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.post_training.job.with_raw_response.status(
-            job_uuid="job_uuid",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        job = await response.parse()
-        assert_matches_type(JobStatusResponse, job, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_status(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.post_training.job.with_streaming_response.status(
-            job_uuid="job_uuid",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            job = await response.parse()
-            assert_matches_type(JobStatusResponse, job, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/responses/__init__.py b/tests/api_resources/responses/__init__.py
deleted file mode 100644
index fd8019a9..00000000
--- a/tests/api_resources/responses/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/responses/test_input_items.py b/tests/api_resources/responses/test_input_items.py
deleted file mode 100644
index a0160f72..00000000
--- a/tests/api_resources/responses/test_input_items.py
+++ /dev/null
@@ -1,124 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types.responses import InputItemListResponse
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestInputItems:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_list(self, client: LlamaStackClient) -> None:
-        input_item = client.responses.input_items.list(
-            response_id="response_id",
-        )
-        assert_matches_type(InputItemListResponse, input_item, path=["response"])
-
-    @parametrize
-    def test_method_list_with_all_params(self, client: LlamaStackClient) -> None:
-        input_item = client.responses.input_items.list(
-            response_id="response_id",
-            after="after",
-            before="before",
-            include=["string"],
-            limit=0,
-            order="asc",
-        )
-        assert_matches_type(InputItemListResponse, input_item, path=["response"])
-
-    @parametrize
-    def test_raw_response_list(self, client: LlamaStackClient) -> None:
-        response = client.responses.input_items.with_raw_response.list(
-            response_id="response_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        input_item = response.parse()
-        assert_matches_type(InputItemListResponse, input_item, path=["response"])
-
-    @parametrize
-    def test_streaming_response_list(self, client: LlamaStackClient) -> None:
-        with client.responses.input_items.with_streaming_response.list(
-            response_id="response_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            input_item = response.parse()
-            assert_matches_type(InputItemListResponse, input_item, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_list(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `response_id` but received ''"):
-            client.responses.input_items.with_raw_response.list(
-                response_id="",
-            )
-
-
-class TestAsyncInputItems:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
-        input_item = await async_client.responses.input_items.list(
-            response_id="response_id",
-        )
-        assert_matches_type(InputItemListResponse, input_item, path=["response"])
-
-    @parametrize
-    async def test_method_list_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        input_item = await async_client.responses.input_items.list(
-            response_id="response_id",
-            after="after",
-            before="before",
-            include=["string"],
-            limit=0,
-            order="asc",
-        )
-        assert_matches_type(InputItemListResponse, input_item, path=["response"])
-
-    @parametrize
-    async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.responses.input_items.with_raw_response.list(
-            response_id="response_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        input_item = await response.parse()
-        assert_matches_type(InputItemListResponse, input_item, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.responses.input_items.with_streaming_response.list(
-            response_id="response_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            input_item = await response.parse()
-            assert_matches_type(InputItemListResponse, input_item, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_list(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `response_id` but received ''"):
-            await async_client.responses.input_items.with_raw_response.list(
-                response_id="",
-            )
diff --git a/tests/api_resources/agents/__init__.py b/tests/api_resources/store/__init__.py
similarity index 100%
rename from tests/api_resources/agents/__init__.py
rename to tests/api_resources/store/__init__.py
diff --git a/tests/api_resources/store/test_order.py b/tests/api_resources/store/test_order.py
new file mode 100644
index 00000000..6173ebd2
--- /dev/null
+++ b/tests/api_resources/store/test_order.py
@@ -0,0 +1,243 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from tests.utils import assert_matches_type
+from llama_stack_cli import LlamaStackCli, AsyncLlamaStackCli
+from llama_stack_cli._utils import parse_datetime
+from llama_stack_cli.types.shared import Order
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestOrder:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_method_create(self, client: LlamaStackCli) -> None:
+        order = client.store.order.create()
+        assert_matches_type(Order, order, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_method_create_with_all_params(self, client: LlamaStackCli) -> None:
+        order = client.store.order.create(
+            id=10,
+            complete=True,
+            pet_id=198772,
+            quantity=7,
+            ship_date=parse_datetime("2019-12-27T18:11:19.117Z"),
+            status="approved",
+        )
+        assert_matches_type(Order, order, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_raw_response_create(self, client: LlamaStackCli) -> None:
+        response = client.store.order.with_raw_response.create()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        order = response.parse()
+        assert_matches_type(Order, order, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_streaming_response_create(self, client: LlamaStackCli) -> None:
+        with client.store.order.with_streaming_response.create() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            order = response.parse()
+            assert_matches_type(Order, order, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_method_retrieve(self, client: LlamaStackCli) -> None:
+        order = client.store.order.retrieve(
+            0,
+        )
+        assert_matches_type(Order, order, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_raw_response_retrieve(self, client: LlamaStackCli) -> None:
+        response = client.store.order.with_raw_response.retrieve(
+            0,
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        order = response.parse()
+        assert_matches_type(Order, order, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_streaming_response_retrieve(self, client: LlamaStackCli) -> None:
+        with client.store.order.with_streaming_response.retrieve(
+            0,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            order = response.parse()
+            assert_matches_type(Order, order, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_method_delete(self, client: LlamaStackCli) -> None:
+        order = client.store.order.delete(
+            0,
+        )
+        assert order is None
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_raw_response_delete(self, client: LlamaStackCli) -> None:
+        response = client.store.order.with_raw_response.delete(
+            0,
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        order = response.parse()
+        assert order is None
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_streaming_response_delete(self, client: LlamaStackCli) -> None:
+        with client.store.order.with_streaming_response.delete(
+            0,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            order = response.parse()
+            assert order is None
+
+        assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncOrder:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_method_create(self, async_client: AsyncLlamaStackCli) -> None:
+        order = await async_client.store.order.create()
+        assert_matches_type(Order, order, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncLlamaStackCli) -> None:
+        order = await async_client.store.order.create(
+            id=10,
+            complete=True,
+            pet_id=198772,
+            quantity=7,
+            ship_date=parse_datetime("2019-12-27T18:11:19.117Z"),
+            status="approved",
+        )
+        assert_matches_type(Order, order, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncLlamaStackCli) -> None:
+        response = await async_client.store.order.with_raw_response.create()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        order = await response.parse()
+        assert_matches_type(Order, order, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncLlamaStackCli) -> None:
+        async with async_client.store.order.with_streaming_response.create() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            order = await response.parse()
+            assert_matches_type(Order, order, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncLlamaStackCli) -> None:
+        order = await async_client.store.order.retrieve(
+            0,
+        )
+        assert_matches_type(Order, order, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackCli) -> None:
+        response = await async_client.store.order.with_raw_response.retrieve(
+            0,
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        order = await response.parse()
+        assert_matches_type(Order, order, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackCli) -> None:
+        async with async_client.store.order.with_streaming_response.retrieve(
+            0,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            order = await response.parse()
+            assert_matches_type(Order, order, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_method_delete(self, async_client: AsyncLlamaStackCli) -> None:
+        order = await async_client.store.order.delete(
+            0,
+        )
+        assert order is None
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_raw_response_delete(self, async_client: AsyncLlamaStackCli) -> None:
+        response = await async_client.store.order.with_raw_response.delete(
+            0,
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        order = await response.parse()
+        assert order is None
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_streaming_response_delete(self, async_client: AsyncLlamaStackCli) -> None:
+        async with async_client.store.order.with_streaming_response.delete(
+            0,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            order = await response.parse()
+            assert order is None
+
+        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_agents.py b/tests/api_resources/test_agents.py
deleted file mode 100644
index c4aa5349..00000000
--- a/tests/api_resources/test_agents.py
+++ /dev/null
@@ -1,278 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import AgentCreateResponse
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestAgents:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_create(self, client: LlamaStackClient) -> None:
-        agent = client.agents.create(
-            agent_config={
-                "instructions": "instructions",
-                "model": "model",
-            },
-        )
-        assert_matches_type(AgentCreateResponse, agent, path=["response"])
-
-    @parametrize
-    def test_method_create_with_all_params(self, client: LlamaStackClient) -> None:
-        agent = client.agents.create(
-            agent_config={
-                "instructions": "instructions",
-                "model": "model",
-                "client_tools": [
-                    {
-                        "name": "name",
-                        "description": "description",
-                        "metadata": {"foo": True},
-                        "parameters": [
-                            {
-                                "description": "description",
-                                "name": "name",
-                                "parameter_type": "parameter_type",
-                                "required": True,
-                                "default": True,
-                            }
-                        ],
-                    }
-                ],
-                "enable_session_persistence": True,
-                "input_shields": ["string"],
-                "max_infer_iters": 0,
-                "name": "name",
-                "output_shields": ["string"],
-                "response_format": {
-                    "json_schema": {"foo": True},
-                    "type": "json_schema",
-                },
-                "sampling_params": {
-                    "strategy": {"type": "greedy"},
-                    "max_tokens": 0,
-                    "repetition_penalty": 0,
-                    "stop": ["string"],
-                },
-                "tool_choice": "auto",
-                "tool_config": {
-                    "system_message_behavior": "append",
-                    "tool_choice": "auto",
-                    "tool_prompt_format": "json",
-                },
-                "tool_prompt_format": "json",
-                "toolgroups": ["string"],
-            },
-        )
-        assert_matches_type(AgentCreateResponse, agent, path=["response"])
-
-    @parametrize
-    def test_raw_response_create(self, client: LlamaStackClient) -> None:
-        response = client.agents.with_raw_response.create(
-            agent_config={
-                "instructions": "instructions",
-                "model": "model",
-            },
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        agent = response.parse()
-        assert_matches_type(AgentCreateResponse, agent, path=["response"])
-
-    @parametrize
-    def test_streaming_response_create(self, client: LlamaStackClient) -> None:
-        with client.agents.with_streaming_response.create(
-            agent_config={
-                "instructions": "instructions",
-                "model": "model",
-            },
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            agent = response.parse()
-            assert_matches_type(AgentCreateResponse, agent, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_delete(self, client: LlamaStackClient) -> None:
-        agent = client.agents.delete(
-            "agent_id",
-        )
-        assert agent is None
-
-    @parametrize
-    def test_raw_response_delete(self, client: LlamaStackClient) -> None:
-        response = client.agents.with_raw_response.delete(
-            "agent_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        agent = response.parse()
-        assert agent is None
-
-    @parametrize
-    def test_streaming_response_delete(self, client: LlamaStackClient) -> None:
-        with client.agents.with_streaming_response.delete(
-            "agent_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            agent = response.parse()
-            assert agent is None
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_delete(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            client.agents.with_raw_response.delete(
-                "",
-            )
-
-
-class TestAsyncAgents:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_create(self, async_client: AsyncLlamaStackClient) -> None:
-        agent = await async_client.agents.create(
-            agent_config={
-                "instructions": "instructions",
-                "model": "model",
-            },
-        )
-        assert_matches_type(AgentCreateResponse, agent, path=["response"])
-
-    @parametrize
-    async def test_method_create_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        agent = await async_client.agents.create(
-            agent_config={
-                "instructions": "instructions",
-                "model": "model",
-                "client_tools": [
-                    {
-                        "name": "name",
-                        "description": "description",
-                        "metadata": {"foo": True},
-                        "parameters": [
-                            {
-                                "description": "description",
-                                "name": "name",
-                                "parameter_type": "parameter_type",
-                                "required": True,
-                                "default": True,
-                            }
-                        ],
-                    }
-                ],
-                "enable_session_persistence": True,
-                "input_shields": ["string"],
-                "max_infer_iters": 0,
-                "name": "name",
-                "output_shields": ["string"],
-                "response_format": {
-                    "json_schema": {"foo": True},
-                    "type": "json_schema",
-                },
-                "sampling_params": {
-                    "strategy": {"type": "greedy"},
-                    "max_tokens": 0,
-                    "repetition_penalty": 0,
-                    "stop": ["string"],
-                },
-                "tool_choice": "auto",
-                "tool_config": {
-                    "system_message_behavior": "append",
-                    "tool_choice": "auto",
-                    "tool_prompt_format": "json",
-                },
-                "tool_prompt_format": "json",
-                "toolgroups": ["string"],
-            },
-        )
-        assert_matches_type(AgentCreateResponse, agent, path=["response"])
-
-    @parametrize
-    async def test_raw_response_create(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.agents.with_raw_response.create(
-            agent_config={
-                "instructions": "instructions",
-                "model": "model",
-            },
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        agent = await response.parse()
-        assert_matches_type(AgentCreateResponse, agent, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_create(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.agents.with_streaming_response.create(
-            agent_config={
-                "instructions": "instructions",
-                "model": "model",
-            },
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            agent = await response.parse()
-            assert_matches_type(AgentCreateResponse, agent, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_delete(self, async_client: AsyncLlamaStackClient) -> None:
-        agent = await async_client.agents.delete(
-            "agent_id",
-        )
-        assert agent is None
-
-    @parametrize
-    async def test_raw_response_delete(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.agents.with_raw_response.delete(
-            "agent_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        agent = await response.parse()
-        assert agent is None
-
-    @parametrize
-    async def test_streaming_response_delete(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.agents.with_streaming_response.delete(
-            "agent_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            agent = await response.parse()
-            assert agent is None
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_delete(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `agent_id` but received ''"):
-            await async_client.agents.with_raw_response.delete(
-                "",
-            )
diff --git a/tests/api_resources/test_benchmarks.py b/tests/api_resources/test_benchmarks.py
deleted file mode 100644
index 97d3d5c9..00000000
--- a/tests/api_resources/test_benchmarks.py
+++ /dev/null
@@ -1,248 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import Benchmark, BenchmarkListResponse
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestBenchmarks:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_retrieve(self, client: LlamaStackClient) -> None:
-        benchmark = client.benchmarks.retrieve(
-            "benchmark_id",
-        )
-        assert_matches_type(Benchmark, benchmark, path=["response"])
-
-    @parametrize
-    def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
-        response = client.benchmarks.with_raw_response.retrieve(
-            "benchmark_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        benchmark = response.parse()
-        assert_matches_type(Benchmark, benchmark, path=["response"])
-
-    @parametrize
-    def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
-        with client.benchmarks.with_streaming_response.retrieve(
-            "benchmark_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            benchmark = response.parse()
-            assert_matches_type(Benchmark, benchmark, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            client.benchmarks.with_raw_response.retrieve(
-                "",
-            )
-
-    @parametrize
-    def test_method_list(self, client: LlamaStackClient) -> None:
-        benchmark = client.benchmarks.list()
-        assert_matches_type(BenchmarkListResponse, benchmark, path=["response"])
-
-    @parametrize
-    def test_raw_response_list(self, client: LlamaStackClient) -> None:
-        response = client.benchmarks.with_raw_response.list()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        benchmark = response.parse()
-        assert_matches_type(BenchmarkListResponse, benchmark, path=["response"])
-
-    @parametrize
-    def test_streaming_response_list(self, client: LlamaStackClient) -> None:
-        with client.benchmarks.with_streaming_response.list() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            benchmark = response.parse()
-            assert_matches_type(BenchmarkListResponse, benchmark, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_register(self, client: LlamaStackClient) -> None:
-        benchmark = client.benchmarks.register(
-            benchmark_id="benchmark_id",
-            dataset_id="dataset_id",
-            scoring_functions=["string"],
-        )
-        assert benchmark is None
-
-    @parametrize
-    def test_method_register_with_all_params(self, client: LlamaStackClient) -> None:
-        benchmark = client.benchmarks.register(
-            benchmark_id="benchmark_id",
-            dataset_id="dataset_id",
-            scoring_functions=["string"],
-            metadata={"foo": True},
-            provider_benchmark_id="provider_benchmark_id",
-            provider_id="provider_id",
-        )
-        assert benchmark is None
-
-    @parametrize
-    def test_raw_response_register(self, client: LlamaStackClient) -> None:
-        response = client.benchmarks.with_raw_response.register(
-            benchmark_id="benchmark_id",
-            dataset_id="dataset_id",
-            scoring_functions=["string"],
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        benchmark = response.parse()
-        assert benchmark is None
-
-    @parametrize
-    def test_streaming_response_register(self, client: LlamaStackClient) -> None:
-        with client.benchmarks.with_streaming_response.register(
-            benchmark_id="benchmark_id",
-            dataset_id="dataset_id",
-            scoring_functions=["string"],
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            benchmark = response.parse()
-            assert benchmark is None
-
-        assert cast(Any, response.is_closed) is True
-
-
-class TestAsyncBenchmarks:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        benchmark = await async_client.benchmarks.retrieve(
-            "benchmark_id",
-        )
-        assert_matches_type(Benchmark, benchmark, path=["response"])
-
-    @parametrize
-    async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.benchmarks.with_raw_response.retrieve(
-            "benchmark_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        benchmark = await response.parse()
-        assert_matches_type(Benchmark, benchmark, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.benchmarks.with_streaming_response.retrieve(
-            "benchmark_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            benchmark = await response.parse()
-            assert_matches_type(Benchmark, benchmark, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            await async_client.benchmarks.with_raw_response.retrieve(
-                "",
-            )
-
-    @parametrize
-    async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
-        benchmark = await async_client.benchmarks.list()
-        assert_matches_type(BenchmarkListResponse, benchmark, path=["response"])
-
-    @parametrize
-    async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.benchmarks.with_raw_response.list()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        benchmark = await response.parse()
-        assert_matches_type(BenchmarkListResponse, benchmark, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.benchmarks.with_streaming_response.list() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            benchmark = await response.parse()
-            assert_matches_type(BenchmarkListResponse, benchmark, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_register(self, async_client: AsyncLlamaStackClient) -> None:
-        benchmark = await async_client.benchmarks.register(
-            benchmark_id="benchmark_id",
-            dataset_id="dataset_id",
-            scoring_functions=["string"],
-        )
-        assert benchmark is None
-
-    @parametrize
-    async def test_method_register_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        benchmark = await async_client.benchmarks.register(
-            benchmark_id="benchmark_id",
-            dataset_id="dataset_id",
-            scoring_functions=["string"],
-            metadata={"foo": True},
-            provider_benchmark_id="provider_benchmark_id",
-            provider_id="provider_id",
-        )
-        assert benchmark is None
-
-    @parametrize
-    async def test_raw_response_register(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.benchmarks.with_raw_response.register(
-            benchmark_id="benchmark_id",
-            dataset_id="dataset_id",
-            scoring_functions=["string"],
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        benchmark = await response.parse()
-        assert benchmark is None
-
-    @parametrize
-    async def test_streaming_response_register(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.benchmarks.with_streaming_response.register(
-            benchmark_id="benchmark_id",
-            dataset_id="dataset_id",
-            scoring_functions=["string"],
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            benchmark = await response.parse()
-            assert benchmark is None
-
-        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_completions.py b/tests/api_resources/test_completions.py
deleted file mode 100644
index 355384b0..00000000
--- a/tests/api_resources/test_completions.py
+++ /dev/null
@@ -1,268 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import CompletionCreateResponse
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestCompletions:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_create_overload_1(self, client: LlamaStackClient) -> None:
-        completion = client.completions.create(
-            model="model",
-            prompt="string",
-        )
-        assert_matches_type(CompletionCreateResponse, completion, path=["response"])
-
-    @parametrize
-    def test_method_create_with_all_params_overload_1(self, client: LlamaStackClient) -> None:
-        completion = client.completions.create(
-            model="model",
-            prompt="string",
-            best_of=0,
-            echo=True,
-            frequency_penalty=0,
-            guided_choice=["string"],
-            logit_bias={"foo": 0},
-            logprobs=True,
-            max_tokens=0,
-            n=0,
-            presence_penalty=0,
-            prompt_logprobs=0,
-            seed=0,
-            stop="string",
-            stream=False,
-            stream_options={"foo": True},
-            suffix="suffix",
-            temperature=0,
-            top_p=0,
-            user="user",
-        )
-        assert_matches_type(CompletionCreateResponse, completion, path=["response"])
-
-    @parametrize
-    def test_raw_response_create_overload_1(self, client: LlamaStackClient) -> None:
-        response = client.completions.with_raw_response.create(
-            model="model",
-            prompt="string",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        completion = response.parse()
-        assert_matches_type(CompletionCreateResponse, completion, path=["response"])
-
-    @parametrize
-    def test_streaming_response_create_overload_1(self, client: LlamaStackClient) -> None:
-        with client.completions.with_streaming_response.create(
-            model="model",
-            prompt="string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            completion = response.parse()
-            assert_matches_type(CompletionCreateResponse, completion, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_create_overload_2(self, client: LlamaStackClient) -> None:
-        completion_stream = client.completions.create(
-            model="model",
-            prompt="string",
-            stream=True,
-        )
-        completion_stream.response.close()
-
-    @parametrize
-    def test_method_create_with_all_params_overload_2(self, client: LlamaStackClient) -> None:
-        completion_stream = client.completions.create(
-            model="model",
-            prompt="string",
-            stream=True,
-            best_of=0,
-            echo=True,
-            frequency_penalty=0,
-            guided_choice=["string"],
-            logit_bias={"foo": 0},
-            logprobs=True,
-            max_tokens=0,
-            n=0,
-            presence_penalty=0,
-            prompt_logprobs=0,
-            seed=0,
-            stop="string",
-            stream_options={"foo": True},
-            suffix="suffix",
-            temperature=0,
-            top_p=0,
-            user="user",
-        )
-        completion_stream.response.close()
-
-    @parametrize
-    def test_raw_response_create_overload_2(self, client: LlamaStackClient) -> None:
-        response = client.completions.with_raw_response.create(
-            model="model",
-            prompt="string",
-            stream=True,
-        )
-
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        stream = response.parse()
-        stream.close()
-
-    @parametrize
-    def test_streaming_response_create_overload_2(self, client: LlamaStackClient) -> None:
-        with client.completions.with_streaming_response.create(
-            model="model",
-            prompt="string",
-            stream=True,
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            stream = response.parse()
-            stream.close()
-
-        assert cast(Any, response.is_closed) is True
-
-
-class TestAsyncCompletions:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        completion = await async_client.completions.create(
-            model="model",
-            prompt="string",
-        )
-        assert_matches_type(CompletionCreateResponse, completion, path=["response"])
-
-    @parametrize
-    async def test_method_create_with_all_params_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        completion = await async_client.completions.create(
-            model="model",
-            prompt="string",
-            best_of=0,
-            echo=True,
-            frequency_penalty=0,
-            guided_choice=["string"],
-            logit_bias={"foo": 0},
-            logprobs=True,
-            max_tokens=0,
-            n=0,
-            presence_penalty=0,
-            prompt_logprobs=0,
-            seed=0,
-            stop="string",
-            stream=False,
-            stream_options={"foo": True},
-            suffix="suffix",
-            temperature=0,
-            top_p=0,
-            user="user",
-        )
-        assert_matches_type(CompletionCreateResponse, completion, path=["response"])
-
-    @parametrize
-    async def test_raw_response_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.completions.with_raw_response.create(
-            model="model",
-            prompt="string",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        completion = await response.parse()
-        assert_matches_type(CompletionCreateResponse, completion, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.completions.with_streaming_response.create(
-            model="model",
-            prompt="string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            completion = await response.parse()
-            assert_matches_type(CompletionCreateResponse, completion, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        completion_stream = await async_client.completions.create(
-            model="model",
-            prompt="string",
-            stream=True,
-        )
-        await completion_stream.response.aclose()
-
-    @parametrize
-    async def test_method_create_with_all_params_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        completion_stream = await async_client.completions.create(
-            model="model",
-            prompt="string",
-            stream=True,
-            best_of=0,
-            echo=True,
-            frequency_penalty=0,
-            guided_choice=["string"],
-            logit_bias={"foo": 0},
-            logprobs=True,
-            max_tokens=0,
-            n=0,
-            presence_penalty=0,
-            prompt_logprobs=0,
-            seed=0,
-            stop="string",
-            stream_options={"foo": True},
-            suffix="suffix",
-            temperature=0,
-            top_p=0,
-            user="user",
-        )
-        await completion_stream.response.aclose()
-
-    @parametrize
-    async def test_raw_response_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.completions.with_raw_response.create(
-            model="model",
-            prompt="string",
-            stream=True,
-        )
-
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        stream = await response.parse()
-        await stream.close()
-
-    @parametrize
-    async def test_streaming_response_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.completions.with_streaming_response.create(
-            model="model",
-            prompt="string",
-            stream=True,
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            stream = await response.parse()
-            await stream.close()
-
-        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_datasets.py b/tests/api_resources/test_datasets.py
deleted file mode 100644
index 9cd17f45..00000000
--- a/tests/api_resources/test_datasets.py
+++ /dev/null
@@ -1,437 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import (
-    DatasetListResponse,
-    DatasetIterrowsResponse,
-    DatasetRegisterResponse,
-    DatasetRetrieveResponse,
-)
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestDatasets:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_retrieve(self, client: LlamaStackClient) -> None:
-        dataset = client.datasets.retrieve(
-            "dataset_id",
-        )
-        assert_matches_type(DatasetRetrieveResponse, dataset, path=["response"])
-
-    @parametrize
-    def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
-        response = client.datasets.with_raw_response.retrieve(
-            "dataset_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        dataset = response.parse()
-        assert_matches_type(DatasetRetrieveResponse, dataset, path=["response"])
-
-    @parametrize
-    def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
-        with client.datasets.with_streaming_response.retrieve(
-            "dataset_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            dataset = response.parse()
-            assert_matches_type(DatasetRetrieveResponse, dataset, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `dataset_id` but received ''"):
-            client.datasets.with_raw_response.retrieve(
-                "",
-            )
-
-    @parametrize
-    def test_method_list(self, client: LlamaStackClient) -> None:
-        dataset = client.datasets.list()
-        assert_matches_type(DatasetListResponse, dataset, path=["response"])
-
-    @parametrize
-    def test_raw_response_list(self, client: LlamaStackClient) -> None:
-        response = client.datasets.with_raw_response.list()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        dataset = response.parse()
-        assert_matches_type(DatasetListResponse, dataset, path=["response"])
-
-    @parametrize
-    def test_streaming_response_list(self, client: LlamaStackClient) -> None:
-        with client.datasets.with_streaming_response.list() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            dataset = response.parse()
-            assert_matches_type(DatasetListResponse, dataset, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_iterrows(self, client: LlamaStackClient) -> None:
-        dataset = client.datasets.iterrows(
-            dataset_id="dataset_id",
-        )
-        assert_matches_type(DatasetIterrowsResponse, dataset, path=["response"])
-
-    @parametrize
-    def test_method_iterrows_with_all_params(self, client: LlamaStackClient) -> None:
-        dataset = client.datasets.iterrows(
-            dataset_id="dataset_id",
-            limit=0,
-            start_index=0,
-        )
-        assert_matches_type(DatasetIterrowsResponse, dataset, path=["response"])
-
-    @parametrize
-    def test_raw_response_iterrows(self, client: LlamaStackClient) -> None:
-        response = client.datasets.with_raw_response.iterrows(
-            dataset_id="dataset_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        dataset = response.parse()
-        assert_matches_type(DatasetIterrowsResponse, dataset, path=["response"])
-
-    @parametrize
-    def test_streaming_response_iterrows(self, client: LlamaStackClient) -> None:
-        with client.datasets.with_streaming_response.iterrows(
-            dataset_id="dataset_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            dataset = response.parse()
-            assert_matches_type(DatasetIterrowsResponse, dataset, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_iterrows(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `dataset_id` but received ''"):
-            client.datasets.with_raw_response.iterrows(
-                dataset_id="",
-            )
-
-    @parametrize
-    def test_method_register(self, client: LlamaStackClient) -> None:
-        dataset = client.datasets.register(
-            purpose="post-training/messages",
-            source={
-                "type": "uri",
-                "uri": "uri",
-            },
-        )
-        assert_matches_type(DatasetRegisterResponse, dataset, path=["response"])
-
-    @parametrize
-    def test_method_register_with_all_params(self, client: LlamaStackClient) -> None:
-        dataset = client.datasets.register(
-            purpose="post-training/messages",
-            source={
-                "type": "uri",
-                "uri": "uri",
-            },
-            dataset_id="dataset_id",
-            metadata={"foo": True},
-        )
-        assert_matches_type(DatasetRegisterResponse, dataset, path=["response"])
-
-    @parametrize
-    def test_raw_response_register(self, client: LlamaStackClient) -> None:
-        response = client.datasets.with_raw_response.register(
-            purpose="post-training/messages",
-            source={
-                "type": "uri",
-                "uri": "uri",
-            },
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        dataset = response.parse()
-        assert_matches_type(DatasetRegisterResponse, dataset, path=["response"])
-
-    @parametrize
-    def test_streaming_response_register(self, client: LlamaStackClient) -> None:
-        with client.datasets.with_streaming_response.register(
-            purpose="post-training/messages",
-            source={
-                "type": "uri",
-                "uri": "uri",
-            },
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            dataset = response.parse()
-            assert_matches_type(DatasetRegisterResponse, dataset, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_unregister(self, client: LlamaStackClient) -> None:
-        dataset = client.datasets.unregister(
-            "dataset_id",
-        )
-        assert dataset is None
-
-    @parametrize
-    def test_raw_response_unregister(self, client: LlamaStackClient) -> None:
-        response = client.datasets.with_raw_response.unregister(
-            "dataset_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        dataset = response.parse()
-        assert dataset is None
-
-    @parametrize
-    def test_streaming_response_unregister(self, client: LlamaStackClient) -> None:
-        with client.datasets.with_streaming_response.unregister(
-            "dataset_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            dataset = response.parse()
-            assert dataset is None
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_unregister(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `dataset_id` but received ''"):
-            client.datasets.with_raw_response.unregister(
-                "",
-            )
-
-
-class TestAsyncDatasets:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        dataset = await async_client.datasets.retrieve(
-            "dataset_id",
-        )
-        assert_matches_type(DatasetRetrieveResponse, dataset, path=["response"])
-
-    @parametrize
-    async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.datasets.with_raw_response.retrieve(
-            "dataset_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        dataset = await response.parse()
-        assert_matches_type(DatasetRetrieveResponse, dataset, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.datasets.with_streaming_response.retrieve(
-            "dataset_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            dataset = await response.parse()
-            assert_matches_type(DatasetRetrieveResponse, dataset, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `dataset_id` but received ''"):
-            await async_client.datasets.with_raw_response.retrieve(
-                "",
-            )
-
-    @parametrize
-    async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
-        dataset = await async_client.datasets.list()
-        assert_matches_type(DatasetListResponse, dataset, path=["response"])
-
-    @parametrize
-    async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.datasets.with_raw_response.list()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        dataset = await response.parse()
-        assert_matches_type(DatasetListResponse, dataset, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.datasets.with_streaming_response.list() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            dataset = await response.parse()
-            assert_matches_type(DatasetListResponse, dataset, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_iterrows(self, async_client: AsyncLlamaStackClient) -> None:
-        dataset = await async_client.datasets.iterrows(
-            dataset_id="dataset_id",
-        )
-        assert_matches_type(DatasetIterrowsResponse, dataset, path=["response"])
-
-    @parametrize
-    async def test_method_iterrows_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        dataset = await async_client.datasets.iterrows(
-            dataset_id="dataset_id",
-            limit=0,
-            start_index=0,
-        )
-        assert_matches_type(DatasetIterrowsResponse, dataset, path=["response"])
-
-    @parametrize
-    async def test_raw_response_iterrows(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.datasets.with_raw_response.iterrows(
-            dataset_id="dataset_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        dataset = await response.parse()
-        assert_matches_type(DatasetIterrowsResponse, dataset, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_iterrows(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.datasets.with_streaming_response.iterrows(
-            dataset_id="dataset_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            dataset = await response.parse()
-            assert_matches_type(DatasetIterrowsResponse, dataset, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_iterrows(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `dataset_id` but received ''"):
-            await async_client.datasets.with_raw_response.iterrows(
-                dataset_id="",
-            )
-
-    @parametrize
-    async def test_method_register(self, async_client: AsyncLlamaStackClient) -> None:
-        dataset = await async_client.datasets.register(
-            purpose="post-training/messages",
-            source={
-                "type": "uri",
-                "uri": "uri",
-            },
-        )
-        assert_matches_type(DatasetRegisterResponse, dataset, path=["response"])
-
-    @parametrize
-    async def test_method_register_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        dataset = await async_client.datasets.register(
-            purpose="post-training/messages",
-            source={
-                "type": "uri",
-                "uri": "uri",
-            },
-            dataset_id="dataset_id",
-            metadata={"foo": True},
-        )
-        assert_matches_type(DatasetRegisterResponse, dataset, path=["response"])
-
-    @parametrize
-    async def test_raw_response_register(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.datasets.with_raw_response.register(
-            purpose="post-training/messages",
-            source={
-                "type": "uri",
-                "uri": "uri",
-            },
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        dataset = await response.parse()
-        assert_matches_type(DatasetRegisterResponse, dataset, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_register(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.datasets.with_streaming_response.register(
-            purpose="post-training/messages",
-            source={
-                "type": "uri",
-                "uri": "uri",
-            },
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            dataset = await response.parse()
-            assert_matches_type(DatasetRegisterResponse, dataset, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_unregister(self, async_client: AsyncLlamaStackClient) -> None:
-        dataset = await async_client.datasets.unregister(
-            "dataset_id",
-        )
-        assert dataset is None
-
-    @parametrize
-    async def test_raw_response_unregister(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.datasets.with_raw_response.unregister(
-            "dataset_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        dataset = await response.parse()
-        assert dataset is None
-
-    @parametrize
-    async def test_streaming_response_unregister(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.datasets.with_streaming_response.unregister(
-            "dataset_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            dataset = await response.parse()
-            assert dataset is None
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_unregister(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `dataset_id` but received ''"):
-            await async_client.datasets.with_raw_response.unregister(
-                "",
-            )
diff --git a/tests/api_resources/test_embeddings.py b/tests/api_resources/test_embeddings.py
deleted file mode 100644
index 5296e9c0..00000000
--- a/tests/api_resources/test_embeddings.py
+++ /dev/null
@@ -1,114 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import CreateEmbeddingsResponse
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestEmbeddings:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_create(self, client: LlamaStackClient) -> None:
-        embedding = client.embeddings.create(
-            input="string",
-            model="model",
-        )
-        assert_matches_type(CreateEmbeddingsResponse, embedding, path=["response"])
-
-    @parametrize
-    def test_method_create_with_all_params(self, client: LlamaStackClient) -> None:
-        embedding = client.embeddings.create(
-            input="string",
-            model="model",
-            dimensions=0,
-            encoding_format="encoding_format",
-            user="user",
-        )
-        assert_matches_type(CreateEmbeddingsResponse, embedding, path=["response"])
-
-    @parametrize
-    def test_raw_response_create(self, client: LlamaStackClient) -> None:
-        response = client.embeddings.with_raw_response.create(
-            input="string",
-            model="model",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        embedding = response.parse()
-        assert_matches_type(CreateEmbeddingsResponse, embedding, path=["response"])
-
-    @parametrize
-    def test_streaming_response_create(self, client: LlamaStackClient) -> None:
-        with client.embeddings.with_streaming_response.create(
-            input="string",
-            model="model",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            embedding = response.parse()
-            assert_matches_type(CreateEmbeddingsResponse, embedding, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-
-class TestAsyncEmbeddings:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_create(self, async_client: AsyncLlamaStackClient) -> None:
-        embedding = await async_client.embeddings.create(
-            input="string",
-            model="model",
-        )
-        assert_matches_type(CreateEmbeddingsResponse, embedding, path=["response"])
-
-    @parametrize
-    async def test_method_create_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        embedding = await async_client.embeddings.create(
-            input="string",
-            model="model",
-            dimensions=0,
-            encoding_format="encoding_format",
-            user="user",
-        )
-        assert_matches_type(CreateEmbeddingsResponse, embedding, path=["response"])
-
-    @parametrize
-    async def test_raw_response_create(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.embeddings.with_raw_response.create(
-            input="string",
-            model="model",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        embedding = await response.parse()
-        assert_matches_type(CreateEmbeddingsResponse, embedding, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_create(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.embeddings.with_streaming_response.create(
-            input="string",
-            model="model",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            embedding = await response.parse()
-            assert_matches_type(CreateEmbeddingsResponse, embedding, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_eval.py b/tests/api_resources/test_eval.py
deleted file mode 100644
index 878b3d28..00000000
--- a/tests/api_resources/test_eval.py
+++ /dev/null
@@ -1,1115 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import (
-    Job,
-    EvaluateResponse,
-)
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestEval:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_evaluate_rows(self, client: LlamaStackClient) -> None:
-        eval = client.eval.evaluate_rows(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-            input_rows=[{"foo": True}],
-            scoring_functions=["string"],
-        )
-        assert_matches_type(EvaluateResponse, eval, path=["response"])
-
-    @parametrize
-    def test_method_evaluate_rows_with_all_params(self, client: LlamaStackClient) -> None:
-        eval = client.eval.evaluate_rows(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {
-                        "strategy": {"type": "greedy"},
-                        "max_tokens": 0,
-                        "repetition_penalty": 0,
-                        "stop": ["string"],
-                    },
-                    "type": "model",
-                    "system_message": {
-                        "content": "string",
-                        "role": "system",
-                    },
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                        "prompt_template": "prompt_template",
-                    }
-                },
-                "num_examples": 0,
-            },
-            input_rows=[{"foo": True}],
-            scoring_functions=["string"],
-        )
-        assert_matches_type(EvaluateResponse, eval, path=["response"])
-
-    @parametrize
-    def test_raw_response_evaluate_rows(self, client: LlamaStackClient) -> None:
-        response = client.eval.with_raw_response.evaluate_rows(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-            input_rows=[{"foo": True}],
-            scoring_functions=["string"],
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        eval = response.parse()
-        assert_matches_type(EvaluateResponse, eval, path=["response"])
-
-    @parametrize
-    def test_streaming_response_evaluate_rows(self, client: LlamaStackClient) -> None:
-        with client.eval.with_streaming_response.evaluate_rows(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-            input_rows=[{"foo": True}],
-            scoring_functions=["string"],
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            eval = response.parse()
-            assert_matches_type(EvaluateResponse, eval, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_evaluate_rows(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            client.eval.with_raw_response.evaluate_rows(
-                benchmark_id="",
-                benchmark_config={
-                    "eval_candidate": {
-                        "model": "model",
-                        "sampling_params": {"strategy": {"type": "greedy"}},
-                        "type": "model",
-                    },
-                    "scoring_params": {
-                        "foo": {
-                            "aggregation_functions": ["average"],
-                            "judge_model": "judge_model",
-                            "judge_score_regexes": ["string"],
-                            "type": "llm_as_judge",
-                        }
-                    },
-                },
-                input_rows=[{"foo": True}],
-                scoring_functions=["string"],
-            )
-
-    @parametrize
-    def test_method_evaluate_rows_alpha(self, client: LlamaStackClient) -> None:
-        eval = client.eval.evaluate_rows_alpha(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-            input_rows=[{"foo": True}],
-            scoring_functions=["string"],
-        )
-        assert_matches_type(EvaluateResponse, eval, path=["response"])
-
-    @parametrize
-    def test_method_evaluate_rows_alpha_with_all_params(self, client: LlamaStackClient) -> None:
-        eval = client.eval.evaluate_rows_alpha(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {
-                        "strategy": {"type": "greedy"},
-                        "max_tokens": 0,
-                        "repetition_penalty": 0,
-                        "stop": ["string"],
-                    },
-                    "type": "model",
-                    "system_message": {
-                        "content": "string",
-                        "role": "system",
-                    },
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                        "prompt_template": "prompt_template",
-                    }
-                },
-                "num_examples": 0,
-            },
-            input_rows=[{"foo": True}],
-            scoring_functions=["string"],
-        )
-        assert_matches_type(EvaluateResponse, eval, path=["response"])
-
-    @parametrize
-    def test_raw_response_evaluate_rows_alpha(self, client: LlamaStackClient) -> None:
-        response = client.eval.with_raw_response.evaluate_rows_alpha(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-            input_rows=[{"foo": True}],
-            scoring_functions=["string"],
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        eval = response.parse()
-        assert_matches_type(EvaluateResponse, eval, path=["response"])
-
-    @parametrize
-    def test_streaming_response_evaluate_rows_alpha(self, client: LlamaStackClient) -> None:
-        with client.eval.with_streaming_response.evaluate_rows_alpha(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-            input_rows=[{"foo": True}],
-            scoring_functions=["string"],
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            eval = response.parse()
-            assert_matches_type(EvaluateResponse, eval, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_evaluate_rows_alpha(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            client.eval.with_raw_response.evaluate_rows_alpha(
-                benchmark_id="",
-                benchmark_config={
-                    "eval_candidate": {
-                        "model": "model",
-                        "sampling_params": {"strategy": {"type": "greedy"}},
-                        "type": "model",
-                    },
-                    "scoring_params": {
-                        "foo": {
-                            "aggregation_functions": ["average"],
-                            "judge_model": "judge_model",
-                            "judge_score_regexes": ["string"],
-                            "type": "llm_as_judge",
-                        }
-                    },
-                },
-                input_rows=[{"foo": True}],
-                scoring_functions=["string"],
-            )
-
-    @parametrize
-    def test_method_run_eval(self, client: LlamaStackClient) -> None:
-        eval = client.eval.run_eval(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-        )
-        assert_matches_type(Job, eval, path=["response"])
-
-    @parametrize
-    def test_method_run_eval_with_all_params(self, client: LlamaStackClient) -> None:
-        eval = client.eval.run_eval(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {
-                        "strategy": {"type": "greedy"},
-                        "max_tokens": 0,
-                        "repetition_penalty": 0,
-                        "stop": ["string"],
-                    },
-                    "type": "model",
-                    "system_message": {
-                        "content": "string",
-                        "role": "system",
-                    },
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                        "prompt_template": "prompt_template",
-                    }
-                },
-                "num_examples": 0,
-            },
-        )
-        assert_matches_type(Job, eval, path=["response"])
-
-    @parametrize
-    def test_raw_response_run_eval(self, client: LlamaStackClient) -> None:
-        response = client.eval.with_raw_response.run_eval(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        eval = response.parse()
-        assert_matches_type(Job, eval, path=["response"])
-
-    @parametrize
-    def test_streaming_response_run_eval(self, client: LlamaStackClient) -> None:
-        with client.eval.with_streaming_response.run_eval(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            eval = response.parse()
-            assert_matches_type(Job, eval, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_run_eval(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            client.eval.with_raw_response.run_eval(
-                benchmark_id="",
-                benchmark_config={
-                    "eval_candidate": {
-                        "model": "model",
-                        "sampling_params": {"strategy": {"type": "greedy"}},
-                        "type": "model",
-                    },
-                    "scoring_params": {
-                        "foo": {
-                            "aggregation_functions": ["average"],
-                            "judge_model": "judge_model",
-                            "judge_score_regexes": ["string"],
-                            "type": "llm_as_judge",
-                        }
-                    },
-                },
-            )
-
-    @parametrize
-    def test_method_run_eval_alpha(self, client: LlamaStackClient) -> None:
-        eval = client.eval.run_eval_alpha(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-        )
-        assert_matches_type(Job, eval, path=["response"])
-
-    @parametrize
-    def test_method_run_eval_alpha_with_all_params(self, client: LlamaStackClient) -> None:
-        eval = client.eval.run_eval_alpha(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {
-                        "strategy": {"type": "greedy"},
-                        "max_tokens": 0,
-                        "repetition_penalty": 0,
-                        "stop": ["string"],
-                    },
-                    "type": "model",
-                    "system_message": {
-                        "content": "string",
-                        "role": "system",
-                    },
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                        "prompt_template": "prompt_template",
-                    }
-                },
-                "num_examples": 0,
-            },
-        )
-        assert_matches_type(Job, eval, path=["response"])
-
-    @parametrize
-    def test_raw_response_run_eval_alpha(self, client: LlamaStackClient) -> None:
-        response = client.eval.with_raw_response.run_eval_alpha(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        eval = response.parse()
-        assert_matches_type(Job, eval, path=["response"])
-
-    @parametrize
-    def test_streaming_response_run_eval_alpha(self, client: LlamaStackClient) -> None:
-        with client.eval.with_streaming_response.run_eval_alpha(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            eval = response.parse()
-            assert_matches_type(Job, eval, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_run_eval_alpha(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            client.eval.with_raw_response.run_eval_alpha(
-                benchmark_id="",
-                benchmark_config={
-                    "eval_candidate": {
-                        "model": "model",
-                        "sampling_params": {"strategy": {"type": "greedy"}},
-                        "type": "model",
-                    },
-                    "scoring_params": {
-                        "foo": {
-                            "aggregation_functions": ["average"],
-                            "judge_model": "judge_model",
-                            "judge_score_regexes": ["string"],
-                            "type": "llm_as_judge",
-                        }
-                    },
-                },
-            )
-
-
-class TestAsyncEval:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_evaluate_rows(self, async_client: AsyncLlamaStackClient) -> None:
-        eval = await async_client.eval.evaluate_rows(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-            input_rows=[{"foo": True}],
-            scoring_functions=["string"],
-        )
-        assert_matches_type(EvaluateResponse, eval, path=["response"])
-
-    @parametrize
-    async def test_method_evaluate_rows_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        eval = await async_client.eval.evaluate_rows(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {
-                        "strategy": {"type": "greedy"},
-                        "max_tokens": 0,
-                        "repetition_penalty": 0,
-                        "stop": ["string"],
-                    },
-                    "type": "model",
-                    "system_message": {
-                        "content": "string",
-                        "role": "system",
-                    },
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                        "prompt_template": "prompt_template",
-                    }
-                },
-                "num_examples": 0,
-            },
-            input_rows=[{"foo": True}],
-            scoring_functions=["string"],
-        )
-        assert_matches_type(EvaluateResponse, eval, path=["response"])
-
-    @parametrize
-    async def test_raw_response_evaluate_rows(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.eval.with_raw_response.evaluate_rows(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-            input_rows=[{"foo": True}],
-            scoring_functions=["string"],
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        eval = await response.parse()
-        assert_matches_type(EvaluateResponse, eval, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_evaluate_rows(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.eval.with_streaming_response.evaluate_rows(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-            input_rows=[{"foo": True}],
-            scoring_functions=["string"],
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            eval = await response.parse()
-            assert_matches_type(EvaluateResponse, eval, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_evaluate_rows(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            await async_client.eval.with_raw_response.evaluate_rows(
-                benchmark_id="",
-                benchmark_config={
-                    "eval_candidate": {
-                        "model": "model",
-                        "sampling_params": {"strategy": {"type": "greedy"}},
-                        "type": "model",
-                    },
-                    "scoring_params": {
-                        "foo": {
-                            "aggregation_functions": ["average"],
-                            "judge_model": "judge_model",
-                            "judge_score_regexes": ["string"],
-                            "type": "llm_as_judge",
-                        }
-                    },
-                },
-                input_rows=[{"foo": True}],
-                scoring_functions=["string"],
-            )
-
-    @parametrize
-    async def test_method_evaluate_rows_alpha(self, async_client: AsyncLlamaStackClient) -> None:
-        eval = await async_client.eval.evaluate_rows_alpha(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-            input_rows=[{"foo": True}],
-            scoring_functions=["string"],
-        )
-        assert_matches_type(EvaluateResponse, eval, path=["response"])
-
-    @parametrize
-    async def test_method_evaluate_rows_alpha_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        eval = await async_client.eval.evaluate_rows_alpha(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {
-                        "strategy": {"type": "greedy"},
-                        "max_tokens": 0,
-                        "repetition_penalty": 0,
-                        "stop": ["string"],
-                    },
-                    "type": "model",
-                    "system_message": {
-                        "content": "string",
-                        "role": "system",
-                    },
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                        "prompt_template": "prompt_template",
-                    }
-                },
-                "num_examples": 0,
-            },
-            input_rows=[{"foo": True}],
-            scoring_functions=["string"],
-        )
-        assert_matches_type(EvaluateResponse, eval, path=["response"])
-
-    @parametrize
-    async def test_raw_response_evaluate_rows_alpha(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.eval.with_raw_response.evaluate_rows_alpha(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-            input_rows=[{"foo": True}],
-            scoring_functions=["string"],
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        eval = await response.parse()
-        assert_matches_type(EvaluateResponse, eval, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_evaluate_rows_alpha(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.eval.with_streaming_response.evaluate_rows_alpha(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-            input_rows=[{"foo": True}],
-            scoring_functions=["string"],
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            eval = await response.parse()
-            assert_matches_type(EvaluateResponse, eval, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_evaluate_rows_alpha(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            await async_client.eval.with_raw_response.evaluate_rows_alpha(
-                benchmark_id="",
-                benchmark_config={
-                    "eval_candidate": {
-                        "model": "model",
-                        "sampling_params": {"strategy": {"type": "greedy"}},
-                        "type": "model",
-                    },
-                    "scoring_params": {
-                        "foo": {
-                            "aggregation_functions": ["average"],
-                            "judge_model": "judge_model",
-                            "judge_score_regexes": ["string"],
-                            "type": "llm_as_judge",
-                        }
-                    },
-                },
-                input_rows=[{"foo": True}],
-                scoring_functions=["string"],
-            )
-
-    @parametrize
-    async def test_method_run_eval(self, async_client: AsyncLlamaStackClient) -> None:
-        eval = await async_client.eval.run_eval(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-        )
-        assert_matches_type(Job, eval, path=["response"])
-
-    @parametrize
-    async def test_method_run_eval_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        eval = await async_client.eval.run_eval(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {
-                        "strategy": {"type": "greedy"},
-                        "max_tokens": 0,
-                        "repetition_penalty": 0,
-                        "stop": ["string"],
-                    },
-                    "type": "model",
-                    "system_message": {
-                        "content": "string",
-                        "role": "system",
-                    },
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                        "prompt_template": "prompt_template",
-                    }
-                },
-                "num_examples": 0,
-            },
-        )
-        assert_matches_type(Job, eval, path=["response"])
-
-    @parametrize
-    async def test_raw_response_run_eval(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.eval.with_raw_response.run_eval(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        eval = await response.parse()
-        assert_matches_type(Job, eval, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_run_eval(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.eval.with_streaming_response.run_eval(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            eval = await response.parse()
-            assert_matches_type(Job, eval, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_run_eval(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            await async_client.eval.with_raw_response.run_eval(
-                benchmark_id="",
-                benchmark_config={
-                    "eval_candidate": {
-                        "model": "model",
-                        "sampling_params": {"strategy": {"type": "greedy"}},
-                        "type": "model",
-                    },
-                    "scoring_params": {
-                        "foo": {
-                            "aggregation_functions": ["average"],
-                            "judge_model": "judge_model",
-                            "judge_score_regexes": ["string"],
-                            "type": "llm_as_judge",
-                        }
-                    },
-                },
-            )
-
-    @parametrize
-    async def test_method_run_eval_alpha(self, async_client: AsyncLlamaStackClient) -> None:
-        eval = await async_client.eval.run_eval_alpha(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-        )
-        assert_matches_type(Job, eval, path=["response"])
-
-    @parametrize
-    async def test_method_run_eval_alpha_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        eval = await async_client.eval.run_eval_alpha(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {
-                        "strategy": {"type": "greedy"},
-                        "max_tokens": 0,
-                        "repetition_penalty": 0,
-                        "stop": ["string"],
-                    },
-                    "type": "model",
-                    "system_message": {
-                        "content": "string",
-                        "role": "system",
-                    },
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                        "prompt_template": "prompt_template",
-                    }
-                },
-                "num_examples": 0,
-            },
-        )
-        assert_matches_type(Job, eval, path=["response"])
-
-    @parametrize
-    async def test_raw_response_run_eval_alpha(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.eval.with_raw_response.run_eval_alpha(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        eval = await response.parse()
-        assert_matches_type(Job, eval, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_run_eval_alpha(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.eval.with_streaming_response.run_eval_alpha(
-            benchmark_id="benchmark_id",
-            benchmark_config={
-                "eval_candidate": {
-                    "model": "model",
-                    "sampling_params": {"strategy": {"type": "greedy"}},
-                    "type": "model",
-                },
-                "scoring_params": {
-                    "foo": {
-                        "aggregation_functions": ["average"],
-                        "judge_model": "judge_model",
-                        "judge_score_regexes": ["string"],
-                        "type": "llm_as_judge",
-                    }
-                },
-            },
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            eval = await response.parse()
-            assert_matches_type(Job, eval, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_run_eval_alpha(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"):
-            await async_client.eval.with_raw_response.run_eval_alpha(
-                benchmark_id="",
-                benchmark_config={
-                    "eval_candidate": {
-                        "model": "model",
-                        "sampling_params": {"strategy": {"type": "greedy"}},
-                        "type": "model",
-                    },
-                    "scoring_params": {
-                        "foo": {
-                            "aggregation_functions": ["average"],
-                            "judge_model": "judge_model",
-                            "judge_score_regexes": ["string"],
-                            "type": "llm_as_judge",
-                        }
-                    },
-                },
-            )
diff --git a/tests/api_resources/test_files.py b/tests/api_resources/test_files.py
deleted file mode 100644
index 7fc5e107..00000000
--- a/tests/api_resources/test_files.py
+++ /dev/null
@@ -1,390 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import File, ListFilesResponse, DeleteFileResponse
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestFiles:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_create(self, client: LlamaStackClient) -> None:
-        file = client.files.create(
-            file=b"raw file contents",
-            purpose="assistants",
-        )
-        assert_matches_type(File, file, path=["response"])
-
-    @parametrize
-    def test_raw_response_create(self, client: LlamaStackClient) -> None:
-        response = client.files.with_raw_response.create(
-            file=b"raw file contents",
-            purpose="assistants",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = response.parse()
-        assert_matches_type(File, file, path=["response"])
-
-    @parametrize
-    def test_streaming_response_create(self, client: LlamaStackClient) -> None:
-        with client.files.with_streaming_response.create(
-            file=b"raw file contents",
-            purpose="assistants",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            file = response.parse()
-            assert_matches_type(File, file, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_retrieve(self, client: LlamaStackClient) -> None:
-        file = client.files.retrieve(
-            "file_id",
-        )
-        assert_matches_type(File, file, path=["response"])
-
-    @parametrize
-    def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
-        response = client.files.with_raw_response.retrieve(
-            "file_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = response.parse()
-        assert_matches_type(File, file, path=["response"])
-
-    @parametrize
-    def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
-        with client.files.with_streaming_response.retrieve(
-            "file_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            file = response.parse()
-            assert_matches_type(File, file, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
-            client.files.with_raw_response.retrieve(
-                "",
-            )
-
-    @parametrize
-    def test_method_list(self, client: LlamaStackClient) -> None:
-        file = client.files.list()
-        assert_matches_type(ListFilesResponse, file, path=["response"])
-
-    @parametrize
-    def test_method_list_with_all_params(self, client: LlamaStackClient) -> None:
-        file = client.files.list(
-            after="after",
-            limit=0,
-            order="asc",
-            purpose="assistants",
-        )
-        assert_matches_type(ListFilesResponse, file, path=["response"])
-
-    @parametrize
-    def test_raw_response_list(self, client: LlamaStackClient) -> None:
-        response = client.files.with_raw_response.list()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = response.parse()
-        assert_matches_type(ListFilesResponse, file, path=["response"])
-
-    @parametrize
-    def test_streaming_response_list(self, client: LlamaStackClient) -> None:
-        with client.files.with_streaming_response.list() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            file = response.parse()
-            assert_matches_type(ListFilesResponse, file, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_delete(self, client: LlamaStackClient) -> None:
-        file = client.files.delete(
-            "file_id",
-        )
-        assert_matches_type(DeleteFileResponse, file, path=["response"])
-
-    @parametrize
-    def test_raw_response_delete(self, client: LlamaStackClient) -> None:
-        response = client.files.with_raw_response.delete(
-            "file_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = response.parse()
-        assert_matches_type(DeleteFileResponse, file, path=["response"])
-
-    @parametrize
-    def test_streaming_response_delete(self, client: LlamaStackClient) -> None:
-        with client.files.with_streaming_response.delete(
-            "file_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            file = response.parse()
-            assert_matches_type(DeleteFileResponse, file, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_delete(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
-            client.files.with_raw_response.delete(
-                "",
-            )
-
-    @parametrize
-    def test_method_content(self, client: LlamaStackClient) -> None:
-        file = client.files.content(
-            "file_id",
-        )
-        assert_matches_type(object, file, path=["response"])
-
-    @parametrize
-    def test_raw_response_content(self, client: LlamaStackClient) -> None:
-        response = client.files.with_raw_response.content(
-            "file_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = response.parse()
-        assert_matches_type(object, file, path=["response"])
-
-    @parametrize
-    def test_streaming_response_content(self, client: LlamaStackClient) -> None:
-        with client.files.with_streaming_response.content(
-            "file_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            file = response.parse()
-            assert_matches_type(object, file, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_content(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
-            client.files.with_raw_response.content(
-                "",
-            )
-
-
-class TestAsyncFiles:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_create(self, async_client: AsyncLlamaStackClient) -> None:
-        file = await async_client.files.create(
-            file=b"raw file contents",
-            purpose="assistants",
-        )
-        assert_matches_type(File, file, path=["response"])
-
-    @parametrize
-    async def test_raw_response_create(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.files.with_raw_response.create(
-            file=b"raw file contents",
-            purpose="assistants",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = await response.parse()
-        assert_matches_type(File, file, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_create(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.files.with_streaming_response.create(
-            file=b"raw file contents",
-            purpose="assistants",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            file = await response.parse()
-            assert_matches_type(File, file, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        file = await async_client.files.retrieve(
-            "file_id",
-        )
-        assert_matches_type(File, file, path=["response"])
-
-    @parametrize
-    async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.files.with_raw_response.retrieve(
-            "file_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = await response.parse()
-        assert_matches_type(File, file, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.files.with_streaming_response.retrieve(
-            "file_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            file = await response.parse()
-            assert_matches_type(File, file, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
-            await async_client.files.with_raw_response.retrieve(
-                "",
-            )
-
-    @parametrize
-    async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
-        file = await async_client.files.list()
-        assert_matches_type(ListFilesResponse, file, path=["response"])
-
-    @parametrize
-    async def test_method_list_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        file = await async_client.files.list(
-            after="after",
-            limit=0,
-            order="asc",
-            purpose="assistants",
-        )
-        assert_matches_type(ListFilesResponse, file, path=["response"])
-
-    @parametrize
-    async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.files.with_raw_response.list()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = await response.parse()
-        assert_matches_type(ListFilesResponse, file, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.files.with_streaming_response.list() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            file = await response.parse()
-            assert_matches_type(ListFilesResponse, file, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_delete(self, async_client: AsyncLlamaStackClient) -> None:
-        file = await async_client.files.delete(
-            "file_id",
-        )
-        assert_matches_type(DeleteFileResponse, file, path=["response"])
-
-    @parametrize
-    async def test_raw_response_delete(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.files.with_raw_response.delete(
-            "file_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = await response.parse()
-        assert_matches_type(DeleteFileResponse, file, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_delete(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.files.with_streaming_response.delete(
-            "file_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            file = await response.parse()
-            assert_matches_type(DeleteFileResponse, file, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_delete(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
-            await async_client.files.with_raw_response.delete(
-                "",
-            )
-
-    @parametrize
-    async def test_method_content(self, async_client: AsyncLlamaStackClient) -> None:
-        file = await async_client.files.content(
-            "file_id",
-        )
-        assert_matches_type(object, file, path=["response"])
-
-    @parametrize
-    async def test_raw_response_content(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.files.with_raw_response.content(
-            "file_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = await response.parse()
-        assert_matches_type(object, file, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_content(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.files.with_streaming_response.content(
-            "file_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            file = await response.parse()
-            assert_matches_type(object, file, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_content(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
-            await async_client.files.with_raw_response.content(
-                "",
-            )
diff --git a/tests/api_resources/test_inference.py b/tests/api_resources/test_inference.py
deleted file mode 100644
index 21967c9a..00000000
--- a/tests/api_resources/test_inference.py
+++ /dev/null
@@ -1,1035 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import (
-    CompletionResponse,
-    EmbeddingsResponse,
-    InferenceBatchChatCompletionResponse,
-)
-from llama_stack_client.types.shared import BatchCompletion, ChatCompletionResponse
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestInference:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_batch_chat_completion(self, client: LlamaStackClient) -> None:
-        inference = client.inference.batch_chat_completion(
-            messages_batch=[
-                [
-                    {
-                        "content": "string",
-                        "role": "user",
-                    }
-                ]
-            ],
-            model_id="model_id",
-        )
-        assert_matches_type(InferenceBatchChatCompletionResponse, inference, path=["response"])
-
-    @parametrize
-    def test_method_batch_chat_completion_with_all_params(self, client: LlamaStackClient) -> None:
-        inference = client.inference.batch_chat_completion(
-            messages_batch=[
-                [
-                    {
-                        "content": "string",
-                        "role": "user",
-                        "context": "string",
-                    }
-                ]
-            ],
-            model_id="model_id",
-            logprobs={"top_k": 0},
-            response_format={
-                "json_schema": {"foo": True},
-                "type": "json_schema",
-            },
-            sampling_params={
-                "strategy": {"type": "greedy"},
-                "max_tokens": 0,
-                "repetition_penalty": 0,
-                "stop": ["string"],
-            },
-            tool_config={
-                "system_message_behavior": "append",
-                "tool_choice": "auto",
-                "tool_prompt_format": "json",
-            },
-            tools=[
-                {
-                    "tool_name": "brave_search",
-                    "description": "description",
-                    "parameters": {
-                        "foo": {
-                            "param_type": "param_type",
-                            "default": True,
-                            "description": "description",
-                            "required": True,
-                        }
-                    },
-                }
-            ],
-        )
-        assert_matches_type(InferenceBatchChatCompletionResponse, inference, path=["response"])
-
-    @parametrize
-    def test_raw_response_batch_chat_completion(self, client: LlamaStackClient) -> None:
-        response = client.inference.with_raw_response.batch_chat_completion(
-            messages_batch=[
-                [
-                    {
-                        "content": "string",
-                        "role": "user",
-                    }
-                ]
-            ],
-            model_id="model_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        inference = response.parse()
-        assert_matches_type(InferenceBatchChatCompletionResponse, inference, path=["response"])
-
-    @parametrize
-    def test_streaming_response_batch_chat_completion(self, client: LlamaStackClient) -> None:
-        with client.inference.with_streaming_response.batch_chat_completion(
-            messages_batch=[
-                [
-                    {
-                        "content": "string",
-                        "role": "user",
-                    }
-                ]
-            ],
-            model_id="model_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            inference = response.parse()
-            assert_matches_type(InferenceBatchChatCompletionResponse, inference, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_batch_completion(self, client: LlamaStackClient) -> None:
-        inference = client.inference.batch_completion(
-            content_batch=["string"],
-            model_id="model_id",
-        )
-        assert_matches_type(BatchCompletion, inference, path=["response"])
-
-    @parametrize
-    def test_method_batch_completion_with_all_params(self, client: LlamaStackClient) -> None:
-        inference = client.inference.batch_completion(
-            content_batch=["string"],
-            model_id="model_id",
-            logprobs={"top_k": 0},
-            response_format={
-                "json_schema": {"foo": True},
-                "type": "json_schema",
-            },
-            sampling_params={
-                "strategy": {"type": "greedy"},
-                "max_tokens": 0,
-                "repetition_penalty": 0,
-                "stop": ["string"],
-            },
-        )
-        assert_matches_type(BatchCompletion, inference, path=["response"])
-
-    @parametrize
-    def test_raw_response_batch_completion(self, client: LlamaStackClient) -> None:
-        response = client.inference.with_raw_response.batch_completion(
-            content_batch=["string"],
-            model_id="model_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        inference = response.parse()
-        assert_matches_type(BatchCompletion, inference, path=["response"])
-
-    @parametrize
-    def test_streaming_response_batch_completion(self, client: LlamaStackClient) -> None:
-        with client.inference.with_streaming_response.batch_completion(
-            content_batch=["string"],
-            model_id="model_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            inference = response.parse()
-            assert_matches_type(BatchCompletion, inference, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_chat_completion_overload_1(self, client: LlamaStackClient) -> None:
-        inference = client.inference.chat_completion(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            model_id="model_id",
-        )
-        assert_matches_type(ChatCompletionResponse, inference, path=["response"])
-
-    @parametrize
-    def test_method_chat_completion_with_all_params_overload_1(self, client: LlamaStackClient) -> None:
-        inference = client.inference.chat_completion(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                    "context": "string",
-                }
-            ],
-            model_id="model_id",
-            logprobs={"top_k": 0},
-            response_format={
-                "json_schema": {"foo": True},
-                "type": "json_schema",
-            },
-            sampling_params={
-                "strategy": {"type": "greedy"},
-                "max_tokens": 0,
-                "repetition_penalty": 0,
-                "stop": ["string"],
-            },
-            stream=False,
-            tool_choice="auto",
-            tool_config={
-                "system_message_behavior": "append",
-                "tool_choice": "auto",
-                "tool_prompt_format": "json",
-            },
-            tool_prompt_format="json",
-            tools=[
-                {
-                    "tool_name": "brave_search",
-                    "description": "description",
-                    "parameters": {
-                        "foo": {
-                            "param_type": "param_type",
-                            "default": True,
-                            "description": "description",
-                            "required": True,
-                        }
-                    },
-                }
-            ],
-        )
-        assert_matches_type(ChatCompletionResponse, inference, path=["response"])
-
-    @parametrize
-    def test_raw_response_chat_completion_overload_1(self, client: LlamaStackClient) -> None:
-        response = client.inference.with_raw_response.chat_completion(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            model_id="model_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        inference = response.parse()
-        assert_matches_type(ChatCompletionResponse, inference, path=["response"])
-
-    @parametrize
-    def test_streaming_response_chat_completion_overload_1(self, client: LlamaStackClient) -> None:
-        with client.inference.with_streaming_response.chat_completion(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            model_id="model_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            inference = response.parse()
-            assert_matches_type(ChatCompletionResponse, inference, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_chat_completion_overload_2(self, client: LlamaStackClient) -> None:
-        inference_stream = client.inference.chat_completion(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            model_id="model_id",
-            stream=True,
-        )
-        inference_stream.response.close()
-
-    @parametrize
-    def test_method_chat_completion_with_all_params_overload_2(self, client: LlamaStackClient) -> None:
-        inference_stream = client.inference.chat_completion(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                    "context": "string",
-                }
-            ],
-            model_id="model_id",
-            stream=True,
-            logprobs={"top_k": 0},
-            response_format={
-                "json_schema": {"foo": True},
-                "type": "json_schema",
-            },
-            sampling_params={
-                "strategy": {"type": "greedy"},
-                "max_tokens": 0,
-                "repetition_penalty": 0,
-                "stop": ["string"],
-            },
-            tool_choice="auto",
-            tool_config={
-                "system_message_behavior": "append",
-                "tool_choice": "auto",
-                "tool_prompt_format": "json",
-            },
-            tool_prompt_format="json",
-            tools=[
-                {
-                    "tool_name": "brave_search",
-                    "description": "description",
-                    "parameters": {
-                        "foo": {
-                            "param_type": "param_type",
-                            "default": True,
-                            "description": "description",
-                            "required": True,
-                        }
-                    },
-                }
-            ],
-        )
-        inference_stream.response.close()
-
-    @parametrize
-    def test_raw_response_chat_completion_overload_2(self, client: LlamaStackClient) -> None:
-        response = client.inference.with_raw_response.chat_completion(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            model_id="model_id",
-            stream=True,
-        )
-
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        stream = response.parse()
-        stream.close()
-
-    @parametrize
-    def test_streaming_response_chat_completion_overload_2(self, client: LlamaStackClient) -> None:
-        with client.inference.with_streaming_response.chat_completion(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            model_id="model_id",
-            stream=True,
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            stream = response.parse()
-            stream.close()
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_completion_overload_1(self, client: LlamaStackClient) -> None:
-        inference = client.inference.completion(
-            content="string",
-            model_id="model_id",
-        )
-        assert_matches_type(CompletionResponse, inference, path=["response"])
-
-    @parametrize
-    def test_method_completion_with_all_params_overload_1(self, client: LlamaStackClient) -> None:
-        inference = client.inference.completion(
-            content="string",
-            model_id="model_id",
-            logprobs={"top_k": 0},
-            response_format={
-                "json_schema": {"foo": True},
-                "type": "json_schema",
-            },
-            sampling_params={
-                "strategy": {"type": "greedy"},
-                "max_tokens": 0,
-                "repetition_penalty": 0,
-                "stop": ["string"],
-            },
-            stream=False,
-        )
-        assert_matches_type(CompletionResponse, inference, path=["response"])
-
-    @parametrize
-    def test_raw_response_completion_overload_1(self, client: LlamaStackClient) -> None:
-        response = client.inference.with_raw_response.completion(
-            content="string",
-            model_id="model_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        inference = response.parse()
-        assert_matches_type(CompletionResponse, inference, path=["response"])
-
-    @parametrize
-    def test_streaming_response_completion_overload_1(self, client: LlamaStackClient) -> None:
-        with client.inference.with_streaming_response.completion(
-            content="string",
-            model_id="model_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            inference = response.parse()
-            assert_matches_type(CompletionResponse, inference, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_completion_overload_2(self, client: LlamaStackClient) -> None:
-        inference_stream = client.inference.completion(
-            content="string",
-            model_id="model_id",
-            stream=True,
-        )
-        inference_stream.response.close()
-
-    @parametrize
-    def test_method_completion_with_all_params_overload_2(self, client: LlamaStackClient) -> None:
-        inference_stream = client.inference.completion(
-            content="string",
-            model_id="model_id",
-            stream=True,
-            logprobs={"top_k": 0},
-            response_format={
-                "json_schema": {"foo": True},
-                "type": "json_schema",
-            },
-            sampling_params={
-                "strategy": {"type": "greedy"},
-                "max_tokens": 0,
-                "repetition_penalty": 0,
-                "stop": ["string"],
-            },
-        )
-        inference_stream.response.close()
-
-    @parametrize
-    def test_raw_response_completion_overload_2(self, client: LlamaStackClient) -> None:
-        response = client.inference.with_raw_response.completion(
-            content="string",
-            model_id="model_id",
-            stream=True,
-        )
-
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        stream = response.parse()
-        stream.close()
-
-    @parametrize
-    def test_streaming_response_completion_overload_2(self, client: LlamaStackClient) -> None:
-        with client.inference.with_streaming_response.completion(
-            content="string",
-            model_id="model_id",
-            stream=True,
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            stream = response.parse()
-            stream.close()
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_embeddings(self, client: LlamaStackClient) -> None:
-        inference = client.inference.embeddings(
-            contents=["string"],
-            model_id="model_id",
-        )
-        assert_matches_type(EmbeddingsResponse, inference, path=["response"])
-
-    @parametrize
-    def test_method_embeddings_with_all_params(self, client: LlamaStackClient) -> None:
-        inference = client.inference.embeddings(
-            contents=["string"],
-            model_id="model_id",
-            output_dimension=0,
-            task_type="query",
-            text_truncation="none",
-        )
-        assert_matches_type(EmbeddingsResponse, inference, path=["response"])
-
-    @parametrize
-    def test_raw_response_embeddings(self, client: LlamaStackClient) -> None:
-        response = client.inference.with_raw_response.embeddings(
-            contents=["string"],
-            model_id="model_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        inference = response.parse()
-        assert_matches_type(EmbeddingsResponse, inference, path=["response"])
-
-    @parametrize
-    def test_streaming_response_embeddings(self, client: LlamaStackClient) -> None:
-        with client.inference.with_streaming_response.embeddings(
-            contents=["string"],
-            model_id="model_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            inference = response.parse()
-            assert_matches_type(EmbeddingsResponse, inference, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-
-class TestAsyncInference:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_batch_chat_completion(self, async_client: AsyncLlamaStackClient) -> None:
-        inference = await async_client.inference.batch_chat_completion(
-            messages_batch=[
-                [
-                    {
-                        "content": "string",
-                        "role": "user",
-                    }
-                ]
-            ],
-            model_id="model_id",
-        )
-        assert_matches_type(InferenceBatchChatCompletionResponse, inference, path=["response"])
-
-    @parametrize
-    async def test_method_batch_chat_completion_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        inference = await async_client.inference.batch_chat_completion(
-            messages_batch=[
-                [
-                    {
-                        "content": "string",
-                        "role": "user",
-                        "context": "string",
-                    }
-                ]
-            ],
-            model_id="model_id",
-            logprobs={"top_k": 0},
-            response_format={
-                "json_schema": {"foo": True},
-                "type": "json_schema",
-            },
-            sampling_params={
-                "strategy": {"type": "greedy"},
-                "max_tokens": 0,
-                "repetition_penalty": 0,
-                "stop": ["string"],
-            },
-            tool_config={
-                "system_message_behavior": "append",
-                "tool_choice": "auto",
-                "tool_prompt_format": "json",
-            },
-            tools=[
-                {
-                    "tool_name": "brave_search",
-                    "description": "description",
-                    "parameters": {
-                        "foo": {
-                            "param_type": "param_type",
-                            "default": True,
-                            "description": "description",
-                            "required": True,
-                        }
-                    },
-                }
-            ],
-        )
-        assert_matches_type(InferenceBatchChatCompletionResponse, inference, path=["response"])
-
-    @parametrize
-    async def test_raw_response_batch_chat_completion(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.inference.with_raw_response.batch_chat_completion(
-            messages_batch=[
-                [
-                    {
-                        "content": "string",
-                        "role": "user",
-                    }
-                ]
-            ],
-            model_id="model_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        inference = await response.parse()
-        assert_matches_type(InferenceBatchChatCompletionResponse, inference, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_batch_chat_completion(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.inference.with_streaming_response.batch_chat_completion(
-            messages_batch=[
-                [
-                    {
-                        "content": "string",
-                        "role": "user",
-                    }
-                ]
-            ],
-            model_id="model_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            inference = await response.parse()
-            assert_matches_type(InferenceBatchChatCompletionResponse, inference, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_batch_completion(self, async_client: AsyncLlamaStackClient) -> None:
-        inference = await async_client.inference.batch_completion(
-            content_batch=["string"],
-            model_id="model_id",
-        )
-        assert_matches_type(BatchCompletion, inference, path=["response"])
-
-    @parametrize
-    async def test_method_batch_completion_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        inference = await async_client.inference.batch_completion(
-            content_batch=["string"],
-            model_id="model_id",
-            logprobs={"top_k": 0},
-            response_format={
-                "json_schema": {"foo": True},
-                "type": "json_schema",
-            },
-            sampling_params={
-                "strategy": {"type": "greedy"},
-                "max_tokens": 0,
-                "repetition_penalty": 0,
-                "stop": ["string"],
-            },
-        )
-        assert_matches_type(BatchCompletion, inference, path=["response"])
-
-    @parametrize
-    async def test_raw_response_batch_completion(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.inference.with_raw_response.batch_completion(
-            content_batch=["string"],
-            model_id="model_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        inference = await response.parse()
-        assert_matches_type(BatchCompletion, inference, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_batch_completion(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.inference.with_streaming_response.batch_completion(
-            content_batch=["string"],
-            model_id="model_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            inference = await response.parse()
-            assert_matches_type(BatchCompletion, inference, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_chat_completion_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        inference = await async_client.inference.chat_completion(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            model_id="model_id",
-        )
-        assert_matches_type(ChatCompletionResponse, inference, path=["response"])
-
-    @parametrize
-    async def test_method_chat_completion_with_all_params_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        inference = await async_client.inference.chat_completion(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                    "context": "string",
-                }
-            ],
-            model_id="model_id",
-            logprobs={"top_k": 0},
-            response_format={
-                "json_schema": {"foo": True},
-                "type": "json_schema",
-            },
-            sampling_params={
-                "strategy": {"type": "greedy"},
-                "max_tokens": 0,
-                "repetition_penalty": 0,
-                "stop": ["string"],
-            },
-            stream=False,
-            tool_choice="auto",
-            tool_config={
-                "system_message_behavior": "append",
-                "tool_choice": "auto",
-                "tool_prompt_format": "json",
-            },
-            tool_prompt_format="json",
-            tools=[
-                {
-                    "tool_name": "brave_search",
-                    "description": "description",
-                    "parameters": {
-                        "foo": {
-                            "param_type": "param_type",
-                            "default": True,
-                            "description": "description",
-                            "required": True,
-                        }
-                    },
-                }
-            ],
-        )
-        assert_matches_type(ChatCompletionResponse, inference, path=["response"])
-
-    @parametrize
-    async def test_raw_response_chat_completion_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.inference.with_raw_response.chat_completion(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            model_id="model_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        inference = await response.parse()
-        assert_matches_type(ChatCompletionResponse, inference, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_chat_completion_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.inference.with_streaming_response.chat_completion(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            model_id="model_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            inference = await response.parse()
-            assert_matches_type(ChatCompletionResponse, inference, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_chat_completion_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        inference_stream = await async_client.inference.chat_completion(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            model_id="model_id",
-            stream=True,
-        )
-        await inference_stream.response.aclose()
-
-    @parametrize
-    async def test_method_chat_completion_with_all_params_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        inference_stream = await async_client.inference.chat_completion(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                    "context": "string",
-                }
-            ],
-            model_id="model_id",
-            stream=True,
-            logprobs={"top_k": 0},
-            response_format={
-                "json_schema": {"foo": True},
-                "type": "json_schema",
-            },
-            sampling_params={
-                "strategy": {"type": "greedy"},
-                "max_tokens": 0,
-                "repetition_penalty": 0,
-                "stop": ["string"],
-            },
-            tool_choice="auto",
-            tool_config={
-                "system_message_behavior": "append",
-                "tool_choice": "auto",
-                "tool_prompt_format": "json",
-            },
-            tool_prompt_format="json",
-            tools=[
-                {
-                    "tool_name": "brave_search",
-                    "description": "description",
-                    "parameters": {
-                        "foo": {
-                            "param_type": "param_type",
-                            "default": True,
-                            "description": "description",
-                            "required": True,
-                        }
-                    },
-                }
-            ],
-        )
-        await inference_stream.response.aclose()
-
-    @parametrize
-    async def test_raw_response_chat_completion_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.inference.with_raw_response.chat_completion(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            model_id="model_id",
-            stream=True,
-        )
-
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        stream = await response.parse()
-        await stream.close()
-
-    @parametrize
-    async def test_streaming_response_chat_completion_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.inference.with_streaming_response.chat_completion(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            model_id="model_id",
-            stream=True,
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            stream = await response.parse()
-            await stream.close()
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_completion_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        inference = await async_client.inference.completion(
-            content="string",
-            model_id="model_id",
-        )
-        assert_matches_type(CompletionResponse, inference, path=["response"])
-
-    @parametrize
-    async def test_method_completion_with_all_params_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        inference = await async_client.inference.completion(
-            content="string",
-            model_id="model_id",
-            logprobs={"top_k": 0},
-            response_format={
-                "json_schema": {"foo": True},
-                "type": "json_schema",
-            },
-            sampling_params={
-                "strategy": {"type": "greedy"},
-                "max_tokens": 0,
-                "repetition_penalty": 0,
-                "stop": ["string"],
-            },
-            stream=False,
-        )
-        assert_matches_type(CompletionResponse, inference, path=["response"])
-
-    @parametrize
-    async def test_raw_response_completion_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.inference.with_raw_response.completion(
-            content="string",
-            model_id="model_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        inference = await response.parse()
-        assert_matches_type(CompletionResponse, inference, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_completion_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.inference.with_streaming_response.completion(
-            content="string",
-            model_id="model_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            inference = await response.parse()
-            assert_matches_type(CompletionResponse, inference, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_completion_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        inference_stream = await async_client.inference.completion(
-            content="string",
-            model_id="model_id",
-            stream=True,
-        )
-        await inference_stream.response.aclose()
-
-    @parametrize
-    async def test_method_completion_with_all_params_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        inference_stream = await async_client.inference.completion(
-            content="string",
-            model_id="model_id",
-            stream=True,
-            logprobs={"top_k": 0},
-            response_format={
-                "json_schema": {"foo": True},
-                "type": "json_schema",
-            },
-            sampling_params={
-                "strategy": {"type": "greedy"},
-                "max_tokens": 0,
-                "repetition_penalty": 0,
-                "stop": ["string"],
-            },
-        )
-        await inference_stream.response.aclose()
-
-    @parametrize
-    async def test_raw_response_completion_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.inference.with_raw_response.completion(
-            content="string",
-            model_id="model_id",
-            stream=True,
-        )
-
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        stream = await response.parse()
-        await stream.close()
-
-    @parametrize
-    async def test_streaming_response_completion_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.inference.with_streaming_response.completion(
-            content="string",
-            model_id="model_id",
-            stream=True,
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            stream = await response.parse()
-            await stream.close()
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_embeddings(self, async_client: AsyncLlamaStackClient) -> None:
-        inference = await async_client.inference.embeddings(
-            contents=["string"],
-            model_id="model_id",
-        )
-        assert_matches_type(EmbeddingsResponse, inference, path=["response"])
-
-    @parametrize
-    async def test_method_embeddings_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        inference = await async_client.inference.embeddings(
-            contents=["string"],
-            model_id="model_id",
-            output_dimension=0,
-            task_type="query",
-            text_truncation="none",
-        )
-        assert_matches_type(EmbeddingsResponse, inference, path=["response"])
-
-    @parametrize
-    async def test_raw_response_embeddings(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.inference.with_raw_response.embeddings(
-            contents=["string"],
-            model_id="model_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        inference = await response.parse()
-        assert_matches_type(EmbeddingsResponse, inference, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_embeddings(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.inference.with_streaming_response.embeddings(
-            contents=["string"],
-            model_id="model_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            inference = await response.parse()
-            assert_matches_type(EmbeddingsResponse, inference, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_inspect.py b/tests/api_resources/test_inspect.py
deleted file mode 100644
index a43abe6c..00000000
--- a/tests/api_resources/test_inspect.py
+++ /dev/null
@@ -1,124 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import HealthInfo, VersionInfo
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestInspect:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_health(self, client: LlamaStackClient) -> None:
-        inspect = client.inspect.health()
-        assert_matches_type(HealthInfo, inspect, path=["response"])
-
-    @parametrize
-    def test_raw_response_health(self, client: LlamaStackClient) -> None:
-        response = client.inspect.with_raw_response.health()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        inspect = response.parse()
-        assert_matches_type(HealthInfo, inspect, path=["response"])
-
-    @parametrize
-    def test_streaming_response_health(self, client: LlamaStackClient) -> None:
-        with client.inspect.with_streaming_response.health() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            inspect = response.parse()
-            assert_matches_type(HealthInfo, inspect, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_version(self, client: LlamaStackClient) -> None:
-        inspect = client.inspect.version()
-        assert_matches_type(VersionInfo, inspect, path=["response"])
-
-    @parametrize
-    def test_raw_response_version(self, client: LlamaStackClient) -> None:
-        response = client.inspect.with_raw_response.version()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        inspect = response.parse()
-        assert_matches_type(VersionInfo, inspect, path=["response"])
-
-    @parametrize
-    def test_streaming_response_version(self, client: LlamaStackClient) -> None:
-        with client.inspect.with_streaming_response.version() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            inspect = response.parse()
-            assert_matches_type(VersionInfo, inspect, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-
-class TestAsyncInspect:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_health(self, async_client: AsyncLlamaStackClient) -> None:
-        inspect = await async_client.inspect.health()
-        assert_matches_type(HealthInfo, inspect, path=["response"])
-
-    @parametrize
-    async def test_raw_response_health(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.inspect.with_raw_response.health()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        inspect = await response.parse()
-        assert_matches_type(HealthInfo, inspect, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_health(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.inspect.with_streaming_response.health() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            inspect = await response.parse()
-            assert_matches_type(HealthInfo, inspect, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_version(self, async_client: AsyncLlamaStackClient) -> None:
-        inspect = await async_client.inspect.version()
-        assert_matches_type(VersionInfo, inspect, path=["response"])
-
-    @parametrize
-    async def test_raw_response_version(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.inspect.with_raw_response.version()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        inspect = await response.parse()
-        assert_matches_type(VersionInfo, inspect, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_version(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.inspect.with_streaming_response.version() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            inspect = await response.parse()
-            assert_matches_type(VersionInfo, inspect, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_models.py b/tests/api_resources/test_models.py
deleted file mode 100644
index 2e3f15be..00000000
--- a/tests/api_resources/test_models.py
+++ /dev/null
@@ -1,310 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import Model, ModelListResponse
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestModels:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_retrieve(self, client: LlamaStackClient) -> None:
-        model = client.models.retrieve(
-            "model_id",
-        )
-        assert_matches_type(Model, model, path=["response"])
-
-    @parametrize
-    def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
-        response = client.models.with_raw_response.retrieve(
-            "model_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        model = response.parse()
-        assert_matches_type(Model, model, path=["response"])
-
-    @parametrize
-    def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
-        with client.models.with_streaming_response.retrieve(
-            "model_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            model = response.parse()
-            assert_matches_type(Model, model, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `model_id` but received ''"):
-            client.models.with_raw_response.retrieve(
-                "",
-            )
-
-    @parametrize
-    def test_method_list(self, client: LlamaStackClient) -> None:
-        model = client.models.list()
-        assert_matches_type(ModelListResponse, model, path=["response"])
-
-    @parametrize
-    def test_raw_response_list(self, client: LlamaStackClient) -> None:
-        response = client.models.with_raw_response.list()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        model = response.parse()
-        assert_matches_type(ModelListResponse, model, path=["response"])
-
-    @parametrize
-    def test_streaming_response_list(self, client: LlamaStackClient) -> None:
-        with client.models.with_streaming_response.list() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            model = response.parse()
-            assert_matches_type(ModelListResponse, model, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_register(self, client: LlamaStackClient) -> None:
-        model = client.models.register(
-            model_id="model_id",
-        )
-        assert_matches_type(Model, model, path=["response"])
-
-    @parametrize
-    def test_method_register_with_all_params(self, client: LlamaStackClient) -> None:
-        model = client.models.register(
-            model_id="model_id",
-            metadata={"foo": True},
-            model_type="llm",
-            provider_id="provider_id",
-            provider_model_id="provider_model_id",
-        )
-        assert_matches_type(Model, model, path=["response"])
-
-    @parametrize
-    def test_raw_response_register(self, client: LlamaStackClient) -> None:
-        response = client.models.with_raw_response.register(
-            model_id="model_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        model = response.parse()
-        assert_matches_type(Model, model, path=["response"])
-
-    @parametrize
-    def test_streaming_response_register(self, client: LlamaStackClient) -> None:
-        with client.models.with_streaming_response.register(
-            model_id="model_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            model = response.parse()
-            assert_matches_type(Model, model, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_unregister(self, client: LlamaStackClient) -> None:
-        model = client.models.unregister(
-            "model_id",
-        )
-        assert model is None
-
-    @parametrize
-    def test_raw_response_unregister(self, client: LlamaStackClient) -> None:
-        response = client.models.with_raw_response.unregister(
-            "model_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        model = response.parse()
-        assert model is None
-
-    @parametrize
-    def test_streaming_response_unregister(self, client: LlamaStackClient) -> None:
-        with client.models.with_streaming_response.unregister(
-            "model_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            model = response.parse()
-            assert model is None
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_unregister(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `model_id` but received ''"):
-            client.models.with_raw_response.unregister(
-                "",
-            )
-
-
-class TestAsyncModels:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        model = await async_client.models.retrieve(
-            "model_id",
-        )
-        assert_matches_type(Model, model, path=["response"])
-
-    @parametrize
-    async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.models.with_raw_response.retrieve(
-            "model_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        model = await response.parse()
-        assert_matches_type(Model, model, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.models.with_streaming_response.retrieve(
-            "model_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            model = await response.parse()
-            assert_matches_type(Model, model, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `model_id` but received ''"):
-            await async_client.models.with_raw_response.retrieve(
-                "",
-            )
-
-    @parametrize
-    async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
-        model = await async_client.models.list()
-        assert_matches_type(ModelListResponse, model, path=["response"])
-
-    @parametrize
-    async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.models.with_raw_response.list()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        model = await response.parse()
-        assert_matches_type(ModelListResponse, model, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.models.with_streaming_response.list() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            model = await response.parse()
-            assert_matches_type(ModelListResponse, model, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_register(self, async_client: AsyncLlamaStackClient) -> None:
-        model = await async_client.models.register(
-            model_id="model_id",
-        )
-        assert_matches_type(Model, model, path=["response"])
-
-    @parametrize
-    async def test_method_register_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        model = await async_client.models.register(
-            model_id="model_id",
-            metadata={"foo": True},
-            model_type="llm",
-            provider_id="provider_id",
-            provider_model_id="provider_model_id",
-        )
-        assert_matches_type(Model, model, path=["response"])
-
-    @parametrize
-    async def test_raw_response_register(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.models.with_raw_response.register(
-            model_id="model_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        model = await response.parse()
-        assert_matches_type(Model, model, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_register(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.models.with_streaming_response.register(
-            model_id="model_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            model = await response.parse()
-            assert_matches_type(Model, model, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_unregister(self, async_client: AsyncLlamaStackClient) -> None:
-        model = await async_client.models.unregister(
-            "model_id",
-        )
-        assert model is None
-
-    @parametrize
-    async def test_raw_response_unregister(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.models.with_raw_response.unregister(
-            "model_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        model = await response.parse()
-        assert model is None
-
-    @parametrize
-    async def test_streaming_response_unregister(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.models.with_streaming_response.unregister(
-            "model_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            model = await response.parse()
-            assert model is None
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_unregister(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `model_id` but received ''"):
-            await async_client.models.with_raw_response.unregister(
-                "",
-            )
diff --git a/tests/api_resources/test_pet.py b/tests/api_resources/test_pet.py
new file mode 100644
index 00000000..d6867549
--- /dev/null
+++ b/tests/api_resources/test_pet.py
@@ -0,0 +1,717 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from tests.utils import assert_matches_type
+from llama_stack_cli import LlamaStackCli, AsyncLlamaStackCli
+from llama_stack_cli.types import (
+    Pet,
+    PetFindByTagsResponse,
+    PetUploadImageResponse,
+    PetFindByStatusResponse,
+)
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestPet:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_method_create(self, client: LlamaStackCli) -> None:
+        pet = client.pet.create(
+            name="doggie",
+            photo_urls=["string"],
+        )
+        assert_matches_type(Pet, pet, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_method_create_with_all_params(self, client: LlamaStackCli) -> None:
+        pet = client.pet.create(
+            name="doggie",
+            photo_urls=["string"],
+            id=10,
+            category={
+                "id": 1,
+                "name": "Dogs",
+            },
+            status="available",
+            tags=[
+                {
+                    "id": 0,
+                    "name": "name",
+                }
+            ],
+        )
+        assert_matches_type(Pet, pet, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_raw_response_create(self, client: LlamaStackCli) -> None:
+        response = client.pet.with_raw_response.create(
+            name="doggie",
+            photo_urls=["string"],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        pet = response.parse()
+        assert_matches_type(Pet, pet, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_streaming_response_create(self, client: LlamaStackCli) -> None:
+        with client.pet.with_streaming_response.create(
+            name="doggie",
+            photo_urls=["string"],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            pet = response.parse()
+            assert_matches_type(Pet, pet, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_method_retrieve(self, client: LlamaStackCli) -> None:
+        pet = client.pet.retrieve(
+            0,
+        )
+        assert_matches_type(Pet, pet, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_raw_response_retrieve(self, client: LlamaStackCli) -> None:
+        response = client.pet.with_raw_response.retrieve(
+            0,
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        pet = response.parse()
+        assert_matches_type(Pet, pet, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_streaming_response_retrieve(self, client: LlamaStackCli) -> None:
+        with client.pet.with_streaming_response.retrieve(
+            0,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            pet = response.parse()
+            assert_matches_type(Pet, pet, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_method_update(self, client: LlamaStackCli) -> None:
+        pet = client.pet.update(
+            name="doggie",
+            photo_urls=["string"],
+        )
+        assert_matches_type(Pet, pet, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_method_update_with_all_params(self, client: LlamaStackCli) -> None:
+        pet = client.pet.update(
+            name="doggie",
+            photo_urls=["string"],
+            id=10,
+            category={
+                "id": 1,
+                "name": "Dogs",
+            },
+            status="available",
+            tags=[
+                {
+                    "id": 0,
+                    "name": "name",
+                }
+            ],
+        )
+        assert_matches_type(Pet, pet, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_raw_response_update(self, client: LlamaStackCli) -> None:
+        response = client.pet.with_raw_response.update(
+            name="doggie",
+            photo_urls=["string"],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        pet = response.parse()
+        assert_matches_type(Pet, pet, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_streaming_response_update(self, client: LlamaStackCli) -> None:
+        with client.pet.with_streaming_response.update(
+            name="doggie",
+            photo_urls=["string"],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            pet = response.parse()
+            assert_matches_type(Pet, pet, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_method_delete(self, client: LlamaStackCli) -> None:
+        pet = client.pet.delete(
+            0,
+        )
+        assert pet is None
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_raw_response_delete(self, client: LlamaStackCli) -> None:
+        response = client.pet.with_raw_response.delete(
+            0,
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        pet = response.parse()
+        assert pet is None
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_streaming_response_delete(self, client: LlamaStackCli) -> None:
+        with client.pet.with_streaming_response.delete(
+            0,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            pet = response.parse()
+            assert pet is None
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_method_find_by_status(self, client: LlamaStackCli) -> None:
+        pet = client.pet.find_by_status()
+        assert_matches_type(PetFindByStatusResponse, pet, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_method_find_by_status_with_all_params(self, client: LlamaStackCli) -> None:
+        pet = client.pet.find_by_status(
+            status="available",
+        )
+        assert_matches_type(PetFindByStatusResponse, pet, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_raw_response_find_by_status(self, client: LlamaStackCli) -> None:
+        response = client.pet.with_raw_response.find_by_status()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        pet = response.parse()
+        assert_matches_type(PetFindByStatusResponse, pet, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_streaming_response_find_by_status(self, client: LlamaStackCli) -> None:
+        with client.pet.with_streaming_response.find_by_status() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            pet = response.parse()
+            assert_matches_type(PetFindByStatusResponse, pet, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_method_find_by_tags(self, client: LlamaStackCli) -> None:
+        pet = client.pet.find_by_tags()
+        assert_matches_type(PetFindByTagsResponse, pet, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_method_find_by_tags_with_all_params(self, client: LlamaStackCli) -> None:
+        pet = client.pet.find_by_tags(
+            tags=["string"],
+        )
+        assert_matches_type(PetFindByTagsResponse, pet, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_raw_response_find_by_tags(self, client: LlamaStackCli) -> None:
+        response = client.pet.with_raw_response.find_by_tags()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        pet = response.parse()
+        assert_matches_type(PetFindByTagsResponse, pet, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_streaming_response_find_by_tags(self, client: LlamaStackCli) -> None:
+        with client.pet.with_streaming_response.find_by_tags() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            pet = response.parse()
+            assert_matches_type(PetFindByTagsResponse, pet, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_method_update_by_id(self, client: LlamaStackCli) -> None:
+        pet = client.pet.update_by_id(
+            pet_id=0,
+        )
+        assert pet is None
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_method_update_by_id_with_all_params(self, client: LlamaStackCli) -> None:
+        pet = client.pet.update_by_id(
+            pet_id=0,
+            name="name",
+            status="status",
+        )
+        assert pet is None
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_raw_response_update_by_id(self, client: LlamaStackCli) -> None:
+        response = client.pet.with_raw_response.update_by_id(
+            pet_id=0,
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        pet = response.parse()
+        assert pet is None
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_streaming_response_update_by_id(self, client: LlamaStackCli) -> None:
+        with client.pet.with_streaming_response.update_by_id(
+            pet_id=0,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            pet = response.parse()
+            assert pet is None
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_method_upload_image(self, client: LlamaStackCli) -> None:
+        pet = client.pet.upload_image(
+            pet_id=0,
+        )
+        assert_matches_type(PetUploadImageResponse, pet, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_method_upload_image_with_all_params(self, client: LlamaStackCli) -> None:
+        pet = client.pet.upload_image(
+            pet_id=0,
+            additional_metadata="additionalMetadata",
+            image=b"raw file contents",
+        )
+        assert_matches_type(PetUploadImageResponse, pet, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_raw_response_upload_image(self, client: LlamaStackCli) -> None:
+        response = client.pet.with_raw_response.upload_image(
+            pet_id=0,
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        pet = response.parse()
+        assert_matches_type(PetUploadImageResponse, pet, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_streaming_response_upload_image(self, client: LlamaStackCli) -> None:
+        with client.pet.with_streaming_response.upload_image(
+            pet_id=0,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            pet = response.parse()
+            assert_matches_type(PetUploadImageResponse, pet, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncPet:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_method_create(self, async_client: AsyncLlamaStackCli) -> None:
+        pet = await async_client.pet.create(
+            name="doggie",
+            photo_urls=["string"],
+        )
+        assert_matches_type(Pet, pet, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncLlamaStackCli) -> None:
+        pet = await async_client.pet.create(
+            name="doggie",
+            photo_urls=["string"],
+            id=10,
+            category={
+                "id": 1,
+                "name": "Dogs",
+            },
+            status="available",
+            tags=[
+                {
+                    "id": 0,
+                    "name": "name",
+                }
+            ],
+        )
+        assert_matches_type(Pet, pet, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncLlamaStackCli) -> None:
+        response = await async_client.pet.with_raw_response.create(
+            name="doggie",
+            photo_urls=["string"],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        pet = await response.parse()
+        assert_matches_type(Pet, pet, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncLlamaStackCli) -> None:
+        async with async_client.pet.with_streaming_response.create(
+            name="doggie",
+            photo_urls=["string"],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            pet = await response.parse()
+            assert_matches_type(Pet, pet, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncLlamaStackCli) -> None:
+        pet = await async_client.pet.retrieve(
+            0,
+        )
+        assert_matches_type(Pet, pet, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackCli) -> None:
+        response = await async_client.pet.with_raw_response.retrieve(
+            0,
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        pet = await response.parse()
+        assert_matches_type(Pet, pet, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackCli) -> None:
+        async with async_client.pet.with_streaming_response.retrieve(
+            0,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            pet = await response.parse()
+            assert_matches_type(Pet, pet, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_method_update(self, async_client: AsyncLlamaStackCli) -> None:
+        pet = await async_client.pet.update(
+            name="doggie",
+            photo_urls=["string"],
+        )
+        assert_matches_type(Pet, pet, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_method_update_with_all_params(self, async_client: AsyncLlamaStackCli) -> None:
+        pet = await async_client.pet.update(
+            name="doggie",
+            photo_urls=["string"],
+            id=10,
+            category={
+                "id": 1,
+                "name": "Dogs",
+            },
+            status="available",
+            tags=[
+                {
+                    "id": 0,
+                    "name": "name",
+                }
+            ],
+        )
+        assert_matches_type(Pet, pet, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_raw_response_update(self, async_client: AsyncLlamaStackCli) -> None:
+        response = await async_client.pet.with_raw_response.update(
+            name="doggie",
+            photo_urls=["string"],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        pet = await response.parse()
+        assert_matches_type(Pet, pet, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_streaming_response_update(self, async_client: AsyncLlamaStackCli) -> None:
+        async with async_client.pet.with_streaming_response.update(
+            name="doggie",
+            photo_urls=["string"],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            pet = await response.parse()
+            assert_matches_type(Pet, pet, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_method_delete(self, async_client: AsyncLlamaStackCli) -> None:
+        pet = await async_client.pet.delete(
+            0,
+        )
+        assert pet is None
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_raw_response_delete(self, async_client: AsyncLlamaStackCli) -> None:
+        response = await async_client.pet.with_raw_response.delete(
+            0,
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        pet = await response.parse()
+        assert pet is None
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_streaming_response_delete(self, async_client: AsyncLlamaStackCli) -> None:
+        async with async_client.pet.with_streaming_response.delete(
+            0,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            pet = await response.parse()
+            assert pet is None
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_method_find_by_status(self, async_client: AsyncLlamaStackCli) -> None:
+        pet = await async_client.pet.find_by_status()
+        assert_matches_type(PetFindByStatusResponse, pet, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_method_find_by_status_with_all_params(self, async_client: AsyncLlamaStackCli) -> None:
+        pet = await async_client.pet.find_by_status(
+            status="available",
+        )
+        assert_matches_type(PetFindByStatusResponse, pet, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_raw_response_find_by_status(self, async_client: AsyncLlamaStackCli) -> None:
+        response = await async_client.pet.with_raw_response.find_by_status()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        pet = await response.parse()
+        assert_matches_type(PetFindByStatusResponse, pet, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_streaming_response_find_by_status(self, async_client: AsyncLlamaStackCli) -> None:
+        async with async_client.pet.with_streaming_response.find_by_status() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            pet = await response.parse()
+            assert_matches_type(PetFindByStatusResponse, pet, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_method_find_by_tags(self, async_client: AsyncLlamaStackCli) -> None:
+        pet = await async_client.pet.find_by_tags()
+        assert_matches_type(PetFindByTagsResponse, pet, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_method_find_by_tags_with_all_params(self, async_client: AsyncLlamaStackCli) -> None:
+        pet = await async_client.pet.find_by_tags(
+            tags=["string"],
+        )
+        assert_matches_type(PetFindByTagsResponse, pet, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_raw_response_find_by_tags(self, async_client: AsyncLlamaStackCli) -> None:
+        response = await async_client.pet.with_raw_response.find_by_tags()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        pet = await response.parse()
+        assert_matches_type(PetFindByTagsResponse, pet, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_streaming_response_find_by_tags(self, async_client: AsyncLlamaStackCli) -> None:
+        async with async_client.pet.with_streaming_response.find_by_tags() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            pet = await response.parse()
+            assert_matches_type(PetFindByTagsResponse, pet, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_method_update_by_id(self, async_client: AsyncLlamaStackCli) -> None:
+        pet = await async_client.pet.update_by_id(
+            pet_id=0,
+        )
+        assert pet is None
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_method_update_by_id_with_all_params(self, async_client: AsyncLlamaStackCli) -> None:
+        pet = await async_client.pet.update_by_id(
+            pet_id=0,
+            name="name",
+            status="status",
+        )
+        assert pet is None
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_raw_response_update_by_id(self, async_client: AsyncLlamaStackCli) -> None:
+        response = await async_client.pet.with_raw_response.update_by_id(
+            pet_id=0,
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        pet = await response.parse()
+        assert pet is None
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_streaming_response_update_by_id(self, async_client: AsyncLlamaStackCli) -> None:
+        async with async_client.pet.with_streaming_response.update_by_id(
+            pet_id=0,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            pet = await response.parse()
+            assert pet is None
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_method_upload_image(self, async_client: AsyncLlamaStackCli) -> None:
+        pet = await async_client.pet.upload_image(
+            pet_id=0,
+        )
+        assert_matches_type(PetUploadImageResponse, pet, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_method_upload_image_with_all_params(self, async_client: AsyncLlamaStackCli) -> None:
+        pet = await async_client.pet.upload_image(
+            pet_id=0,
+            additional_metadata="additionalMetadata",
+            image=b"raw file contents",
+        )
+        assert_matches_type(PetUploadImageResponse, pet, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_raw_response_upload_image(self, async_client: AsyncLlamaStackCli) -> None:
+        response = await async_client.pet.with_raw_response.upload_image(
+            pet_id=0,
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        pet = await response.parse()
+        assert_matches_type(PetUploadImageResponse, pet, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_streaming_response_upload_image(self, async_client: AsyncLlamaStackCli) -> None:
+        async with async_client.pet.with_streaming_response.upload_image(
+            pet_id=0,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            pet = await response.parse()
+            assert_matches_type(PetUploadImageResponse, pet, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_post_training.py b/tests/api_resources/test_post_training.py
deleted file mode 100644
index 5e7430fb..00000000
--- a/tests/api_resources/test_post_training.py
+++ /dev/null
@@ -1,462 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import (
-    PostTrainingJob,
-)
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestPostTraining:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_preference_optimize(self, client: LlamaStackClient) -> None:
-        post_training = client.post_training.preference_optimize(
-            algorithm_config={
-                "epsilon": 0,
-                "gamma": 0,
-                "reward_clip": 0,
-                "reward_scale": 0,
-            },
-            finetuned_model="finetuned_model",
-            hyperparam_search_config={"foo": True},
-            job_uuid="job_uuid",
-            logger_config={"foo": True},
-            training_config={
-                "gradient_accumulation_steps": 0,
-                "max_steps_per_epoch": 0,
-                "n_epochs": 0,
-            },
-        )
-        assert_matches_type(PostTrainingJob, post_training, path=["response"])
-
-    @parametrize
-    def test_method_preference_optimize_with_all_params(self, client: LlamaStackClient) -> None:
-        post_training = client.post_training.preference_optimize(
-            algorithm_config={
-                "epsilon": 0,
-                "gamma": 0,
-                "reward_clip": 0,
-                "reward_scale": 0,
-            },
-            finetuned_model="finetuned_model",
-            hyperparam_search_config={"foo": True},
-            job_uuid="job_uuid",
-            logger_config={"foo": True},
-            training_config={
-                "gradient_accumulation_steps": 0,
-                "max_steps_per_epoch": 0,
-                "n_epochs": 0,
-                "data_config": {
-                    "batch_size": 0,
-                    "data_format": "instruct",
-                    "dataset_id": "dataset_id",
-                    "shuffle": True,
-                    "packed": True,
-                    "train_on_input": True,
-                    "validation_dataset_id": "validation_dataset_id",
-                },
-                "dtype": "dtype",
-                "efficiency_config": {
-                    "enable_activation_checkpointing": True,
-                    "enable_activation_offloading": True,
-                    "fsdp_cpu_offload": True,
-                    "memory_efficient_fsdp_wrap": True,
-                },
-                "max_validation_steps": 0,
-                "optimizer_config": {
-                    "lr": 0,
-                    "num_warmup_steps": 0,
-                    "optimizer_type": "adam",
-                    "weight_decay": 0,
-                },
-            },
-        )
-        assert_matches_type(PostTrainingJob, post_training, path=["response"])
-
-    @parametrize
-    def test_raw_response_preference_optimize(self, client: LlamaStackClient) -> None:
-        response = client.post_training.with_raw_response.preference_optimize(
-            algorithm_config={
-                "epsilon": 0,
-                "gamma": 0,
-                "reward_clip": 0,
-                "reward_scale": 0,
-            },
-            finetuned_model="finetuned_model",
-            hyperparam_search_config={"foo": True},
-            job_uuid="job_uuid",
-            logger_config={"foo": True},
-            training_config={
-                "gradient_accumulation_steps": 0,
-                "max_steps_per_epoch": 0,
-                "n_epochs": 0,
-            },
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        post_training = response.parse()
-        assert_matches_type(PostTrainingJob, post_training, path=["response"])
-
-    @parametrize
-    def test_streaming_response_preference_optimize(self, client: LlamaStackClient) -> None:
-        with client.post_training.with_streaming_response.preference_optimize(
-            algorithm_config={
-                "epsilon": 0,
-                "gamma": 0,
-                "reward_clip": 0,
-                "reward_scale": 0,
-            },
-            finetuned_model="finetuned_model",
-            hyperparam_search_config={"foo": True},
-            job_uuid="job_uuid",
-            logger_config={"foo": True},
-            training_config={
-                "gradient_accumulation_steps": 0,
-                "max_steps_per_epoch": 0,
-                "n_epochs": 0,
-            },
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            post_training = response.parse()
-            assert_matches_type(PostTrainingJob, post_training, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_supervised_fine_tune(self, client: LlamaStackClient) -> None:
-        post_training = client.post_training.supervised_fine_tune(
-            hyperparam_search_config={"foo": True},
-            job_uuid="job_uuid",
-            logger_config={"foo": True},
-            training_config={
-                "gradient_accumulation_steps": 0,
-                "max_steps_per_epoch": 0,
-                "n_epochs": 0,
-            },
-        )
-        assert_matches_type(PostTrainingJob, post_training, path=["response"])
-
-    @parametrize
-    def test_method_supervised_fine_tune_with_all_params(self, client: LlamaStackClient) -> None:
-        post_training = client.post_training.supervised_fine_tune(
-            hyperparam_search_config={"foo": True},
-            job_uuid="job_uuid",
-            logger_config={"foo": True},
-            training_config={
-                "gradient_accumulation_steps": 0,
-                "max_steps_per_epoch": 0,
-                "n_epochs": 0,
-                "data_config": {
-                    "batch_size": 0,
-                    "data_format": "instruct",
-                    "dataset_id": "dataset_id",
-                    "shuffle": True,
-                    "packed": True,
-                    "train_on_input": True,
-                    "validation_dataset_id": "validation_dataset_id",
-                },
-                "dtype": "dtype",
-                "efficiency_config": {
-                    "enable_activation_checkpointing": True,
-                    "enable_activation_offloading": True,
-                    "fsdp_cpu_offload": True,
-                    "memory_efficient_fsdp_wrap": True,
-                },
-                "max_validation_steps": 0,
-                "optimizer_config": {
-                    "lr": 0,
-                    "num_warmup_steps": 0,
-                    "optimizer_type": "adam",
-                    "weight_decay": 0,
-                },
-            },
-            algorithm_config={
-                "alpha": 0,
-                "apply_lora_to_mlp": True,
-                "apply_lora_to_output": True,
-                "lora_attn_modules": ["string"],
-                "rank": 0,
-                "type": "LoRA",
-                "quantize_base": True,
-                "use_dora": True,
-            },
-            checkpoint_dir="checkpoint_dir",
-            model="model",
-        )
-        assert_matches_type(PostTrainingJob, post_training, path=["response"])
-
-    @parametrize
-    def test_raw_response_supervised_fine_tune(self, client: LlamaStackClient) -> None:
-        response = client.post_training.with_raw_response.supervised_fine_tune(
-            hyperparam_search_config={"foo": True},
-            job_uuid="job_uuid",
-            logger_config={"foo": True},
-            training_config={
-                "gradient_accumulation_steps": 0,
-                "max_steps_per_epoch": 0,
-                "n_epochs": 0,
-            },
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        post_training = response.parse()
-        assert_matches_type(PostTrainingJob, post_training, path=["response"])
-
-    @parametrize
-    def test_streaming_response_supervised_fine_tune(self, client: LlamaStackClient) -> None:
-        with client.post_training.with_streaming_response.supervised_fine_tune(
-            hyperparam_search_config={"foo": True},
-            job_uuid="job_uuid",
-            logger_config={"foo": True},
-            training_config={
-                "gradient_accumulation_steps": 0,
-                "max_steps_per_epoch": 0,
-                "n_epochs": 0,
-            },
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            post_training = response.parse()
-            assert_matches_type(PostTrainingJob, post_training, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-
-class TestAsyncPostTraining:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_preference_optimize(self, async_client: AsyncLlamaStackClient) -> None:
-        post_training = await async_client.post_training.preference_optimize(
-            algorithm_config={
-                "epsilon": 0,
-                "gamma": 0,
-                "reward_clip": 0,
-                "reward_scale": 0,
-            },
-            finetuned_model="finetuned_model",
-            hyperparam_search_config={"foo": True},
-            job_uuid="job_uuid",
-            logger_config={"foo": True},
-            training_config={
-                "gradient_accumulation_steps": 0,
-                "max_steps_per_epoch": 0,
-                "n_epochs": 0,
-            },
-        )
-        assert_matches_type(PostTrainingJob, post_training, path=["response"])
-
-    @parametrize
-    async def test_method_preference_optimize_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        post_training = await async_client.post_training.preference_optimize(
-            algorithm_config={
-                "epsilon": 0,
-                "gamma": 0,
-                "reward_clip": 0,
-                "reward_scale": 0,
-            },
-            finetuned_model="finetuned_model",
-            hyperparam_search_config={"foo": True},
-            job_uuid="job_uuid",
-            logger_config={"foo": True},
-            training_config={
-                "gradient_accumulation_steps": 0,
-                "max_steps_per_epoch": 0,
-                "n_epochs": 0,
-                "data_config": {
-                    "batch_size": 0,
-                    "data_format": "instruct",
-                    "dataset_id": "dataset_id",
-                    "shuffle": True,
-                    "packed": True,
-                    "train_on_input": True,
-                    "validation_dataset_id": "validation_dataset_id",
-                },
-                "dtype": "dtype",
-                "efficiency_config": {
-                    "enable_activation_checkpointing": True,
-                    "enable_activation_offloading": True,
-                    "fsdp_cpu_offload": True,
-                    "memory_efficient_fsdp_wrap": True,
-                },
-                "max_validation_steps": 0,
-                "optimizer_config": {
-                    "lr": 0,
-                    "num_warmup_steps": 0,
-                    "optimizer_type": "adam",
-                    "weight_decay": 0,
-                },
-            },
-        )
-        assert_matches_type(PostTrainingJob, post_training, path=["response"])
-
-    @parametrize
-    async def test_raw_response_preference_optimize(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.post_training.with_raw_response.preference_optimize(
-            algorithm_config={
-                "epsilon": 0,
-                "gamma": 0,
-                "reward_clip": 0,
-                "reward_scale": 0,
-            },
-            finetuned_model="finetuned_model",
-            hyperparam_search_config={"foo": True},
-            job_uuid="job_uuid",
-            logger_config={"foo": True},
-            training_config={
-                "gradient_accumulation_steps": 0,
-                "max_steps_per_epoch": 0,
-                "n_epochs": 0,
-            },
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        post_training = await response.parse()
-        assert_matches_type(PostTrainingJob, post_training, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_preference_optimize(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.post_training.with_streaming_response.preference_optimize(
-            algorithm_config={
-                "epsilon": 0,
-                "gamma": 0,
-                "reward_clip": 0,
-                "reward_scale": 0,
-            },
-            finetuned_model="finetuned_model",
-            hyperparam_search_config={"foo": True},
-            job_uuid="job_uuid",
-            logger_config={"foo": True},
-            training_config={
-                "gradient_accumulation_steps": 0,
-                "max_steps_per_epoch": 0,
-                "n_epochs": 0,
-            },
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            post_training = await response.parse()
-            assert_matches_type(PostTrainingJob, post_training, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_supervised_fine_tune(self, async_client: AsyncLlamaStackClient) -> None:
-        post_training = await async_client.post_training.supervised_fine_tune(
-            hyperparam_search_config={"foo": True},
-            job_uuid="job_uuid",
-            logger_config={"foo": True},
-            training_config={
-                "gradient_accumulation_steps": 0,
-                "max_steps_per_epoch": 0,
-                "n_epochs": 0,
-            },
-        )
-        assert_matches_type(PostTrainingJob, post_training, path=["response"])
-
-    @parametrize
-    async def test_method_supervised_fine_tune_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        post_training = await async_client.post_training.supervised_fine_tune(
-            hyperparam_search_config={"foo": True},
-            job_uuid="job_uuid",
-            logger_config={"foo": True},
-            training_config={
-                "gradient_accumulation_steps": 0,
-                "max_steps_per_epoch": 0,
-                "n_epochs": 0,
-                "data_config": {
-                    "batch_size": 0,
-                    "data_format": "instruct",
-                    "dataset_id": "dataset_id",
-                    "shuffle": True,
-                    "packed": True,
-                    "train_on_input": True,
-                    "validation_dataset_id": "validation_dataset_id",
-                },
-                "dtype": "dtype",
-                "efficiency_config": {
-                    "enable_activation_checkpointing": True,
-                    "enable_activation_offloading": True,
-                    "fsdp_cpu_offload": True,
-                    "memory_efficient_fsdp_wrap": True,
-                },
-                "max_validation_steps": 0,
-                "optimizer_config": {
-                    "lr": 0,
-                    "num_warmup_steps": 0,
-                    "optimizer_type": "adam",
-                    "weight_decay": 0,
-                },
-            },
-            algorithm_config={
-                "alpha": 0,
-                "apply_lora_to_mlp": True,
-                "apply_lora_to_output": True,
-                "lora_attn_modules": ["string"],
-                "rank": 0,
-                "type": "LoRA",
-                "quantize_base": True,
-                "use_dora": True,
-            },
-            checkpoint_dir="checkpoint_dir",
-            model="model",
-        )
-        assert_matches_type(PostTrainingJob, post_training, path=["response"])
-
-    @parametrize
-    async def test_raw_response_supervised_fine_tune(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.post_training.with_raw_response.supervised_fine_tune(
-            hyperparam_search_config={"foo": True},
-            job_uuid="job_uuid",
-            logger_config={"foo": True},
-            training_config={
-                "gradient_accumulation_steps": 0,
-                "max_steps_per_epoch": 0,
-                "n_epochs": 0,
-            },
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        post_training = await response.parse()
-        assert_matches_type(PostTrainingJob, post_training, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_supervised_fine_tune(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.post_training.with_streaming_response.supervised_fine_tune(
-            hyperparam_search_config={"foo": True},
-            job_uuid="job_uuid",
-            logger_config={"foo": True},
-            training_config={
-                "gradient_accumulation_steps": 0,
-                "max_steps_per_epoch": 0,
-                "n_epochs": 0,
-            },
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            post_training = await response.parse()
-            assert_matches_type(PostTrainingJob, post_training, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_providers.py b/tests/api_resources/test_providers.py
deleted file mode 100644
index b23a84bd..00000000
--- a/tests/api_resources/test_providers.py
+++ /dev/null
@@ -1,150 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import ProviderInfo, ProviderListResponse
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestProviders:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_retrieve(self, client: LlamaStackClient) -> None:
-        provider = client.providers.retrieve(
-            "provider_id",
-        )
-        assert_matches_type(ProviderInfo, provider, path=["response"])
-
-    @parametrize
-    def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
-        response = client.providers.with_raw_response.retrieve(
-            "provider_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        provider = response.parse()
-        assert_matches_type(ProviderInfo, provider, path=["response"])
-
-    @parametrize
-    def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
-        with client.providers.with_streaming_response.retrieve(
-            "provider_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            provider = response.parse()
-            assert_matches_type(ProviderInfo, provider, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `provider_id` but received ''"):
-            client.providers.with_raw_response.retrieve(
-                "",
-            )
-
-    @parametrize
-    def test_method_list(self, client: LlamaStackClient) -> None:
-        provider = client.providers.list()
-        assert_matches_type(ProviderListResponse, provider, path=["response"])
-
-    @parametrize
-    def test_raw_response_list(self, client: LlamaStackClient) -> None:
-        response = client.providers.with_raw_response.list()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        provider = response.parse()
-        assert_matches_type(ProviderListResponse, provider, path=["response"])
-
-    @parametrize
-    def test_streaming_response_list(self, client: LlamaStackClient) -> None:
-        with client.providers.with_streaming_response.list() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            provider = response.parse()
-            assert_matches_type(ProviderListResponse, provider, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-
-class TestAsyncProviders:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        provider = await async_client.providers.retrieve(
-            "provider_id",
-        )
-        assert_matches_type(ProviderInfo, provider, path=["response"])
-
-    @parametrize
-    async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.providers.with_raw_response.retrieve(
-            "provider_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        provider = await response.parse()
-        assert_matches_type(ProviderInfo, provider, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.providers.with_streaming_response.retrieve(
-            "provider_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            provider = await response.parse()
-            assert_matches_type(ProviderInfo, provider, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `provider_id` but received ''"):
-            await async_client.providers.with_raw_response.retrieve(
-                "",
-            )
-
-    @parametrize
-    async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
-        provider = await async_client.providers.list()
-        assert_matches_type(ProviderListResponse, provider, path=["response"])
-
-    @parametrize
-    async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.providers.with_raw_response.list()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        provider = await response.parse()
-        assert_matches_type(ProviderListResponse, provider, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.providers.with_streaming_response.list() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            provider = await response.parse()
-            assert_matches_type(ProviderListResponse, provider, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_responses.py b/tests/api_resources/test_responses.py
deleted file mode 100644
index a3fa9fd1..00000000
--- a/tests/api_resources/test_responses.py
+++ /dev/null
@@ -1,426 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import ResponseObject, ResponseListResponse
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestResponses:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_create_overload_1(self, client: LlamaStackClient) -> None:
-        response = client.responses.create(
-            input="string",
-            model="model",
-        )
-        assert_matches_type(ResponseObject, response, path=["response"])
-
-    @parametrize
-    def test_method_create_with_all_params_overload_1(self, client: LlamaStackClient) -> None:
-        response = client.responses.create(
-            input="string",
-            model="model",
-            instructions="instructions",
-            max_infer_iters=0,
-            previous_response_id="previous_response_id",
-            store=True,
-            stream=False,
-            temperature=0,
-            text={
-                "format": {
-                    "type": "text",
-                    "description": "description",
-                    "name": "name",
-                    "schema": {"foo": True},
-                    "strict": True,
-                }
-            },
-            tools=[
-                {
-                    "type": "web_search",
-                    "search_context_size": "search_context_size",
-                }
-            ],
-        )
-        assert_matches_type(ResponseObject, response, path=["response"])
-
-    @parametrize
-    def test_raw_response_create_overload_1(self, client: LlamaStackClient) -> None:
-        http_response = client.responses.with_raw_response.create(
-            input="string",
-            model="model",
-        )
-
-        assert http_response.is_closed is True
-        assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
-        response = http_response.parse()
-        assert_matches_type(ResponseObject, response, path=["response"])
-
-    @parametrize
-    def test_streaming_response_create_overload_1(self, client: LlamaStackClient) -> None:
-        with client.responses.with_streaming_response.create(
-            input="string",
-            model="model",
-        ) as http_response:
-            assert not http_response.is_closed
-            assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            response = http_response.parse()
-            assert_matches_type(ResponseObject, response, path=["response"])
-
-        assert cast(Any, http_response.is_closed) is True
-
-    @parametrize
-    def test_method_create_overload_2(self, client: LlamaStackClient) -> None:
-        response_stream = client.responses.create(
-            input="string",
-            model="model",
-            stream=True,
-        )
-        response_stream.response.close()
-
-    @parametrize
-    def test_method_create_with_all_params_overload_2(self, client: LlamaStackClient) -> None:
-        response_stream = client.responses.create(
-            input="string",
-            model="model",
-            stream=True,
-            instructions="instructions",
-            max_infer_iters=0,
-            previous_response_id="previous_response_id",
-            store=True,
-            temperature=0,
-            text={
-                "format": {
-                    "type": "text",
-                    "description": "description",
-                    "name": "name",
-                    "schema": {"foo": True},
-                    "strict": True,
-                }
-            },
-            tools=[
-                {
-                    "type": "web_search",
-                    "search_context_size": "search_context_size",
-                }
-            ],
-        )
-        response_stream.response.close()
-
-    @parametrize
-    def test_raw_response_create_overload_2(self, client: LlamaStackClient) -> None:
-        response = client.responses.with_raw_response.create(
-            input="string",
-            model="model",
-            stream=True,
-        )
-
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        stream = response.parse()
-        stream.close()
-
-    @parametrize
-    def test_streaming_response_create_overload_2(self, client: LlamaStackClient) -> None:
-        with client.responses.with_streaming_response.create(
-            input="string",
-            model="model",
-            stream=True,
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            stream = response.parse()
-            stream.close()
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_retrieve(self, client: LlamaStackClient) -> None:
-        response = client.responses.retrieve(
-            "response_id",
-        )
-        assert_matches_type(ResponseObject, response, path=["response"])
-
-    @parametrize
-    def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
-        http_response = client.responses.with_raw_response.retrieve(
-            "response_id",
-        )
-
-        assert http_response.is_closed is True
-        assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
-        response = http_response.parse()
-        assert_matches_type(ResponseObject, response, path=["response"])
-
-    @parametrize
-    def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
-        with client.responses.with_streaming_response.retrieve(
-            "response_id",
-        ) as http_response:
-            assert not http_response.is_closed
-            assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            response = http_response.parse()
-            assert_matches_type(ResponseObject, response, path=["response"])
-
-        assert cast(Any, http_response.is_closed) is True
-
-    @parametrize
-    def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `response_id` but received ''"):
-            client.responses.with_raw_response.retrieve(
-                "",
-            )
-
-    @parametrize
-    def test_method_list(self, client: LlamaStackClient) -> None:
-        response = client.responses.list()
-        assert_matches_type(ResponseListResponse, response, path=["response"])
-
-    @parametrize
-    def test_method_list_with_all_params(self, client: LlamaStackClient) -> None:
-        response = client.responses.list(
-            after="after",
-            limit=0,
-            model="model",
-            order="asc",
-        )
-        assert_matches_type(ResponseListResponse, response, path=["response"])
-
-    @parametrize
-    def test_raw_response_list(self, client: LlamaStackClient) -> None:
-        http_response = client.responses.with_raw_response.list()
-
-        assert http_response.is_closed is True
-        assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
-        response = http_response.parse()
-        assert_matches_type(ResponseListResponse, response, path=["response"])
-
-    @parametrize
-    def test_streaming_response_list(self, client: LlamaStackClient) -> None:
-        with client.responses.with_streaming_response.list() as http_response:
-            assert not http_response.is_closed
-            assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            response = http_response.parse()
-            assert_matches_type(ResponseListResponse, response, path=["response"])
-
-        assert cast(Any, http_response.is_closed) is True
-
-
-class TestAsyncResponses:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.responses.create(
-            input="string",
-            model="model",
-        )
-        assert_matches_type(ResponseObject, response, path=["response"])
-
-    @parametrize
-    async def test_method_create_with_all_params_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.responses.create(
-            input="string",
-            model="model",
-            instructions="instructions",
-            max_infer_iters=0,
-            previous_response_id="previous_response_id",
-            store=True,
-            stream=False,
-            temperature=0,
-            text={
-                "format": {
-                    "type": "text",
-                    "description": "description",
-                    "name": "name",
-                    "schema": {"foo": True},
-                    "strict": True,
-                }
-            },
-            tools=[
-                {
-                    "type": "web_search",
-                    "search_context_size": "search_context_size",
-                }
-            ],
-        )
-        assert_matches_type(ResponseObject, response, path=["response"])
-
-    @parametrize
-    async def test_raw_response_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        http_response = await async_client.responses.with_raw_response.create(
-            input="string",
-            model="model",
-        )
-
-        assert http_response.is_closed is True
-        assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
-        response = await http_response.parse()
-        assert_matches_type(ResponseObject, response, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_create_overload_1(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.responses.with_streaming_response.create(
-            input="string",
-            model="model",
-        ) as http_response:
-            assert not http_response.is_closed
-            assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            response = await http_response.parse()
-            assert_matches_type(ResponseObject, response, path=["response"])
-
-        assert cast(Any, http_response.is_closed) is True
-
-    @parametrize
-    async def test_method_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        response_stream = await async_client.responses.create(
-            input="string",
-            model="model",
-            stream=True,
-        )
-        await response_stream.response.aclose()
-
-    @parametrize
-    async def test_method_create_with_all_params_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        response_stream = await async_client.responses.create(
-            input="string",
-            model="model",
-            stream=True,
-            instructions="instructions",
-            max_infer_iters=0,
-            previous_response_id="previous_response_id",
-            store=True,
-            temperature=0,
-            text={
-                "format": {
-                    "type": "text",
-                    "description": "description",
-                    "name": "name",
-                    "schema": {"foo": True},
-                    "strict": True,
-                }
-            },
-            tools=[
-                {
-                    "type": "web_search",
-                    "search_context_size": "search_context_size",
-                }
-            ],
-        )
-        await response_stream.response.aclose()
-
-    @parametrize
-    async def test_raw_response_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.responses.with_raw_response.create(
-            input="string",
-            model="model",
-            stream=True,
-        )
-
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        stream = await response.parse()
-        await stream.close()
-
-    @parametrize
-    async def test_streaming_response_create_overload_2(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.responses.with_streaming_response.create(
-            input="string",
-            model="model",
-            stream=True,
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            stream = await response.parse()
-            await stream.close()
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.responses.retrieve(
-            "response_id",
-        )
-        assert_matches_type(ResponseObject, response, path=["response"])
-
-    @parametrize
-    async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        http_response = await async_client.responses.with_raw_response.retrieve(
-            "response_id",
-        )
-
-        assert http_response.is_closed is True
-        assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
-        response = await http_response.parse()
-        assert_matches_type(ResponseObject, response, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.responses.with_streaming_response.retrieve(
-            "response_id",
-        ) as http_response:
-            assert not http_response.is_closed
-            assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            response = await http_response.parse()
-            assert_matches_type(ResponseObject, response, path=["response"])
-
-        assert cast(Any, http_response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `response_id` but received ''"):
-            await async_client.responses.with_raw_response.retrieve(
-                "",
-            )
-
-    @parametrize
-    async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.responses.list()
-        assert_matches_type(ResponseListResponse, response, path=["response"])
-
-    @parametrize
-    async def test_method_list_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.responses.list(
-            after="after",
-            limit=0,
-            model="model",
-            order="asc",
-        )
-        assert_matches_type(ResponseListResponse, response, path=["response"])
-
-    @parametrize
-    async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        http_response = await async_client.responses.with_raw_response.list()
-
-        assert http_response.is_closed is True
-        assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
-        response = await http_response.parse()
-        assert_matches_type(ResponseListResponse, response, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.responses.with_streaming_response.list() as http_response:
-            assert not http_response.is_closed
-            assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            response = await http_response.parse()
-            assert_matches_type(ResponseListResponse, response, path=["response"])
-
-        assert cast(Any, http_response.is_closed) is True
diff --git a/tests/api_resources/test_routes.py b/tests/api_resources/test_routes.py
deleted file mode 100644
index 12b51f28..00000000
--- a/tests/api_resources/test_routes.py
+++ /dev/null
@@ -1,74 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import RouteListResponse
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestRoutes:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_list(self, client: LlamaStackClient) -> None:
-        route = client.routes.list()
-        assert_matches_type(RouteListResponse, route, path=["response"])
-
-    @parametrize
-    def test_raw_response_list(self, client: LlamaStackClient) -> None:
-        response = client.routes.with_raw_response.list()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        route = response.parse()
-        assert_matches_type(RouteListResponse, route, path=["response"])
-
-    @parametrize
-    def test_streaming_response_list(self, client: LlamaStackClient) -> None:
-        with client.routes.with_streaming_response.list() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            route = response.parse()
-            assert_matches_type(RouteListResponse, route, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-
-class TestAsyncRoutes:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
-        route = await async_client.routes.list()
-        assert_matches_type(RouteListResponse, route, path=["response"])
-
-    @parametrize
-    async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.routes.with_raw_response.list()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        route = await response.parse()
-        assert_matches_type(RouteListResponse, route, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.routes.with_streaming_response.list() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            route = await response.parse()
-            assert_matches_type(RouteListResponse, route, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_safety.py b/tests/api_resources/test_safety.py
deleted file mode 100644
index 257dfd76..00000000
--- a/tests/api_resources/test_safety.py
+++ /dev/null
@@ -1,128 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import RunShieldResponse
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestSafety:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_run_shield(self, client: LlamaStackClient) -> None:
-        safety = client.safety.run_shield(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            params={"foo": True},
-            shield_id="shield_id",
-        )
-        assert_matches_type(RunShieldResponse, safety, path=["response"])
-
-    @parametrize
-    def test_raw_response_run_shield(self, client: LlamaStackClient) -> None:
-        response = client.safety.with_raw_response.run_shield(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            params={"foo": True},
-            shield_id="shield_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        safety = response.parse()
-        assert_matches_type(RunShieldResponse, safety, path=["response"])
-
-    @parametrize
-    def test_streaming_response_run_shield(self, client: LlamaStackClient) -> None:
-        with client.safety.with_streaming_response.run_shield(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            params={"foo": True},
-            shield_id="shield_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            safety = response.parse()
-            assert_matches_type(RunShieldResponse, safety, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-
-class TestAsyncSafety:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_run_shield(self, async_client: AsyncLlamaStackClient) -> None:
-        safety = await async_client.safety.run_shield(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            params={"foo": True},
-            shield_id="shield_id",
-        )
-        assert_matches_type(RunShieldResponse, safety, path=["response"])
-
-    @parametrize
-    async def test_raw_response_run_shield(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.safety.with_raw_response.run_shield(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            params={"foo": True},
-            shield_id="shield_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        safety = await response.parse()
-        assert_matches_type(RunShieldResponse, safety, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_run_shield(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.safety.with_streaming_response.run_shield(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            params={"foo": True},
-            shield_id="shield_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            safety = await response.parse()
-            assert_matches_type(RunShieldResponse, safety, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_scoring.py b/tests/api_resources/test_scoring.py
deleted file mode 100644
index ed46bd07..00000000
--- a/tests/api_resources/test_scoring.py
+++ /dev/null
@@ -1,253 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import (
-    ScoringScoreResponse,
-    ScoringScoreBatchResponse,
-)
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestScoring:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_score(self, client: LlamaStackClient) -> None:
-        scoring = client.scoring.score(
-            input_rows=[{"foo": True}],
-            scoring_functions={
-                "foo": {
-                    "aggregation_functions": ["average"],
-                    "judge_model": "judge_model",
-                    "judge_score_regexes": ["string"],
-                    "type": "llm_as_judge",
-                }
-            },
-        )
-        assert_matches_type(ScoringScoreResponse, scoring, path=["response"])
-
-    @parametrize
-    def test_raw_response_score(self, client: LlamaStackClient) -> None:
-        response = client.scoring.with_raw_response.score(
-            input_rows=[{"foo": True}],
-            scoring_functions={
-                "foo": {
-                    "aggregation_functions": ["average"],
-                    "judge_model": "judge_model",
-                    "judge_score_regexes": ["string"],
-                    "type": "llm_as_judge",
-                }
-            },
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        scoring = response.parse()
-        assert_matches_type(ScoringScoreResponse, scoring, path=["response"])
-
-    @parametrize
-    def test_streaming_response_score(self, client: LlamaStackClient) -> None:
-        with client.scoring.with_streaming_response.score(
-            input_rows=[{"foo": True}],
-            scoring_functions={
-                "foo": {
-                    "aggregation_functions": ["average"],
-                    "judge_model": "judge_model",
-                    "judge_score_regexes": ["string"],
-                    "type": "llm_as_judge",
-                }
-            },
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            scoring = response.parse()
-            assert_matches_type(ScoringScoreResponse, scoring, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_score_batch(self, client: LlamaStackClient) -> None:
-        scoring = client.scoring.score_batch(
-            dataset_id="dataset_id",
-            save_results_dataset=True,
-            scoring_functions={
-                "foo": {
-                    "aggregation_functions": ["average"],
-                    "judge_model": "judge_model",
-                    "judge_score_regexes": ["string"],
-                    "type": "llm_as_judge",
-                }
-            },
-        )
-        assert_matches_type(ScoringScoreBatchResponse, scoring, path=["response"])
-
-    @parametrize
-    def test_raw_response_score_batch(self, client: LlamaStackClient) -> None:
-        response = client.scoring.with_raw_response.score_batch(
-            dataset_id="dataset_id",
-            save_results_dataset=True,
-            scoring_functions={
-                "foo": {
-                    "aggregation_functions": ["average"],
-                    "judge_model": "judge_model",
-                    "judge_score_regexes": ["string"],
-                    "type": "llm_as_judge",
-                }
-            },
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        scoring = response.parse()
-        assert_matches_type(ScoringScoreBatchResponse, scoring, path=["response"])
-
-    @parametrize
-    def test_streaming_response_score_batch(self, client: LlamaStackClient) -> None:
-        with client.scoring.with_streaming_response.score_batch(
-            dataset_id="dataset_id",
-            save_results_dataset=True,
-            scoring_functions={
-                "foo": {
-                    "aggregation_functions": ["average"],
-                    "judge_model": "judge_model",
-                    "judge_score_regexes": ["string"],
-                    "type": "llm_as_judge",
-                }
-            },
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            scoring = response.parse()
-            assert_matches_type(ScoringScoreBatchResponse, scoring, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-
-class TestAsyncScoring:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_score(self, async_client: AsyncLlamaStackClient) -> None:
-        scoring = await async_client.scoring.score(
-            input_rows=[{"foo": True}],
-            scoring_functions={
-                "foo": {
-                    "aggregation_functions": ["average"],
-                    "judge_model": "judge_model",
-                    "judge_score_regexes": ["string"],
-                    "type": "llm_as_judge",
-                }
-            },
-        )
-        assert_matches_type(ScoringScoreResponse, scoring, path=["response"])
-
-    @parametrize
-    async def test_raw_response_score(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.scoring.with_raw_response.score(
-            input_rows=[{"foo": True}],
-            scoring_functions={
-                "foo": {
-                    "aggregation_functions": ["average"],
-                    "judge_model": "judge_model",
-                    "judge_score_regexes": ["string"],
-                    "type": "llm_as_judge",
-                }
-            },
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        scoring = await response.parse()
-        assert_matches_type(ScoringScoreResponse, scoring, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_score(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.scoring.with_streaming_response.score(
-            input_rows=[{"foo": True}],
-            scoring_functions={
-                "foo": {
-                    "aggregation_functions": ["average"],
-                    "judge_model": "judge_model",
-                    "judge_score_regexes": ["string"],
-                    "type": "llm_as_judge",
-                }
-            },
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            scoring = await response.parse()
-            assert_matches_type(ScoringScoreResponse, scoring, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_score_batch(self, async_client: AsyncLlamaStackClient) -> None:
-        scoring = await async_client.scoring.score_batch(
-            dataset_id="dataset_id",
-            save_results_dataset=True,
-            scoring_functions={
-                "foo": {
-                    "aggregation_functions": ["average"],
-                    "judge_model": "judge_model",
-                    "judge_score_regexes": ["string"],
-                    "type": "llm_as_judge",
-                }
-            },
-        )
-        assert_matches_type(ScoringScoreBatchResponse, scoring, path=["response"])
-
-    @parametrize
-    async def test_raw_response_score_batch(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.scoring.with_raw_response.score_batch(
-            dataset_id="dataset_id",
-            save_results_dataset=True,
-            scoring_functions={
-                "foo": {
-                    "aggregation_functions": ["average"],
-                    "judge_model": "judge_model",
-                    "judge_score_regexes": ["string"],
-                    "type": "llm_as_judge",
-                }
-            },
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        scoring = await response.parse()
-        assert_matches_type(ScoringScoreBatchResponse, scoring, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_score_batch(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.scoring.with_streaming_response.score_batch(
-            dataset_id="dataset_id",
-            save_results_dataset=True,
-            scoring_functions={
-                "foo": {
-                    "aggregation_functions": ["average"],
-                    "judge_model": "judge_model",
-                    "judge_score_regexes": ["string"],
-                    "type": "llm_as_judge",
-                }
-            },
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            scoring = await response.parse()
-            assert_matches_type(ScoringScoreBatchResponse, scoring, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_scoring_functions.py b/tests/api_resources/test_scoring_functions.py
deleted file mode 100644
index 44556317..00000000
--- a/tests/api_resources/test_scoring_functions.py
+++ /dev/null
@@ -1,263 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import (
-    ScoringFn,
-    ScoringFunctionListResponse,
-)
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestScoringFunctions:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_retrieve(self, client: LlamaStackClient) -> None:
-        scoring_function = client.scoring_functions.retrieve(
-            "scoring_fn_id",
-        )
-        assert_matches_type(ScoringFn, scoring_function, path=["response"])
-
-    @parametrize
-    def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
-        response = client.scoring_functions.with_raw_response.retrieve(
-            "scoring_fn_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        scoring_function = response.parse()
-        assert_matches_type(ScoringFn, scoring_function, path=["response"])
-
-    @parametrize
-    def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
-        with client.scoring_functions.with_streaming_response.retrieve(
-            "scoring_fn_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            scoring_function = response.parse()
-            assert_matches_type(ScoringFn, scoring_function, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `scoring_fn_id` but received ''"):
-            client.scoring_functions.with_raw_response.retrieve(
-                "",
-            )
-
-    @parametrize
-    def test_method_list(self, client: LlamaStackClient) -> None:
-        scoring_function = client.scoring_functions.list()
-        assert_matches_type(ScoringFunctionListResponse, scoring_function, path=["response"])
-
-    @parametrize
-    def test_raw_response_list(self, client: LlamaStackClient) -> None:
-        response = client.scoring_functions.with_raw_response.list()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        scoring_function = response.parse()
-        assert_matches_type(ScoringFunctionListResponse, scoring_function, path=["response"])
-
-    @parametrize
-    def test_streaming_response_list(self, client: LlamaStackClient) -> None:
-        with client.scoring_functions.with_streaming_response.list() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            scoring_function = response.parse()
-            assert_matches_type(ScoringFunctionListResponse, scoring_function, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_register(self, client: LlamaStackClient) -> None:
-        scoring_function = client.scoring_functions.register(
-            description="description",
-            return_type={"type": "string"},
-            scoring_fn_id="scoring_fn_id",
-        )
-        assert scoring_function is None
-
-    @parametrize
-    def test_method_register_with_all_params(self, client: LlamaStackClient) -> None:
-        scoring_function = client.scoring_functions.register(
-            description="description",
-            return_type={"type": "string"},
-            scoring_fn_id="scoring_fn_id",
-            params={
-                "aggregation_functions": ["average"],
-                "judge_model": "judge_model",
-                "judge_score_regexes": ["string"],
-                "type": "llm_as_judge",
-                "prompt_template": "prompt_template",
-            },
-            provider_id="provider_id",
-            provider_scoring_fn_id="provider_scoring_fn_id",
-        )
-        assert scoring_function is None
-
-    @parametrize
-    def test_raw_response_register(self, client: LlamaStackClient) -> None:
-        response = client.scoring_functions.with_raw_response.register(
-            description="description",
-            return_type={"type": "string"},
-            scoring_fn_id="scoring_fn_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        scoring_function = response.parse()
-        assert scoring_function is None
-
-    @parametrize
-    def test_streaming_response_register(self, client: LlamaStackClient) -> None:
-        with client.scoring_functions.with_streaming_response.register(
-            description="description",
-            return_type={"type": "string"},
-            scoring_fn_id="scoring_fn_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            scoring_function = response.parse()
-            assert scoring_function is None
-
-        assert cast(Any, response.is_closed) is True
-
-
-class TestAsyncScoringFunctions:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        scoring_function = await async_client.scoring_functions.retrieve(
-            "scoring_fn_id",
-        )
-        assert_matches_type(ScoringFn, scoring_function, path=["response"])
-
-    @parametrize
-    async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.scoring_functions.with_raw_response.retrieve(
-            "scoring_fn_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        scoring_function = await response.parse()
-        assert_matches_type(ScoringFn, scoring_function, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.scoring_functions.with_streaming_response.retrieve(
-            "scoring_fn_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            scoring_function = await response.parse()
-            assert_matches_type(ScoringFn, scoring_function, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `scoring_fn_id` but received ''"):
-            await async_client.scoring_functions.with_raw_response.retrieve(
-                "",
-            )
-
-    @parametrize
-    async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
-        scoring_function = await async_client.scoring_functions.list()
-        assert_matches_type(ScoringFunctionListResponse, scoring_function, path=["response"])
-
-    @parametrize
-    async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.scoring_functions.with_raw_response.list()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        scoring_function = await response.parse()
-        assert_matches_type(ScoringFunctionListResponse, scoring_function, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.scoring_functions.with_streaming_response.list() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            scoring_function = await response.parse()
-            assert_matches_type(ScoringFunctionListResponse, scoring_function, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_register(self, async_client: AsyncLlamaStackClient) -> None:
-        scoring_function = await async_client.scoring_functions.register(
-            description="description",
-            return_type={"type": "string"},
-            scoring_fn_id="scoring_fn_id",
-        )
-        assert scoring_function is None
-
-    @parametrize
-    async def test_method_register_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        scoring_function = await async_client.scoring_functions.register(
-            description="description",
-            return_type={"type": "string"},
-            scoring_fn_id="scoring_fn_id",
-            params={
-                "aggregation_functions": ["average"],
-                "judge_model": "judge_model",
-                "judge_score_regexes": ["string"],
-                "type": "llm_as_judge",
-                "prompt_template": "prompt_template",
-            },
-            provider_id="provider_id",
-            provider_scoring_fn_id="provider_scoring_fn_id",
-        )
-        assert scoring_function is None
-
-    @parametrize
-    async def test_raw_response_register(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.scoring_functions.with_raw_response.register(
-            description="description",
-            return_type={"type": "string"},
-            scoring_fn_id="scoring_fn_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        scoring_function = await response.parse()
-        assert scoring_function is None
-
-    @parametrize
-    async def test_streaming_response_register(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.scoring_functions.with_streaming_response.register(
-            description="description",
-            return_type={"type": "string"},
-            scoring_fn_id="scoring_fn_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            scoring_function = await response.parse()
-            assert scoring_function is None
-
-        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_shields.py b/tests/api_resources/test_shields.py
deleted file mode 100644
index 037a66d3..00000000
--- a/tests/api_resources/test_shields.py
+++ /dev/null
@@ -1,232 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import Shield, ShieldListResponse
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestShields:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_retrieve(self, client: LlamaStackClient) -> None:
-        shield = client.shields.retrieve(
-            "identifier",
-        )
-        assert_matches_type(Shield, shield, path=["response"])
-
-    @parametrize
-    def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
-        response = client.shields.with_raw_response.retrieve(
-            "identifier",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        shield = response.parse()
-        assert_matches_type(Shield, shield, path=["response"])
-
-    @parametrize
-    def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
-        with client.shields.with_streaming_response.retrieve(
-            "identifier",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            shield = response.parse()
-            assert_matches_type(Shield, shield, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `identifier` but received ''"):
-            client.shields.with_raw_response.retrieve(
-                "",
-            )
-
-    @parametrize
-    def test_method_list(self, client: LlamaStackClient) -> None:
-        shield = client.shields.list()
-        assert_matches_type(ShieldListResponse, shield, path=["response"])
-
-    @parametrize
-    def test_raw_response_list(self, client: LlamaStackClient) -> None:
-        response = client.shields.with_raw_response.list()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        shield = response.parse()
-        assert_matches_type(ShieldListResponse, shield, path=["response"])
-
-    @parametrize
-    def test_streaming_response_list(self, client: LlamaStackClient) -> None:
-        with client.shields.with_streaming_response.list() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            shield = response.parse()
-            assert_matches_type(ShieldListResponse, shield, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_register(self, client: LlamaStackClient) -> None:
-        shield = client.shields.register(
-            shield_id="shield_id",
-        )
-        assert_matches_type(Shield, shield, path=["response"])
-
-    @parametrize
-    def test_method_register_with_all_params(self, client: LlamaStackClient) -> None:
-        shield = client.shields.register(
-            shield_id="shield_id",
-            params={"foo": True},
-            provider_id="provider_id",
-            provider_shield_id="provider_shield_id",
-        )
-        assert_matches_type(Shield, shield, path=["response"])
-
-    @parametrize
-    def test_raw_response_register(self, client: LlamaStackClient) -> None:
-        response = client.shields.with_raw_response.register(
-            shield_id="shield_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        shield = response.parse()
-        assert_matches_type(Shield, shield, path=["response"])
-
-    @parametrize
-    def test_streaming_response_register(self, client: LlamaStackClient) -> None:
-        with client.shields.with_streaming_response.register(
-            shield_id="shield_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            shield = response.parse()
-            assert_matches_type(Shield, shield, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-
-class TestAsyncShields:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        shield = await async_client.shields.retrieve(
-            "identifier",
-        )
-        assert_matches_type(Shield, shield, path=["response"])
-
-    @parametrize
-    async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.shields.with_raw_response.retrieve(
-            "identifier",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        shield = await response.parse()
-        assert_matches_type(Shield, shield, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.shields.with_streaming_response.retrieve(
-            "identifier",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            shield = await response.parse()
-            assert_matches_type(Shield, shield, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `identifier` but received ''"):
-            await async_client.shields.with_raw_response.retrieve(
-                "",
-            )
-
-    @parametrize
-    async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
-        shield = await async_client.shields.list()
-        assert_matches_type(ShieldListResponse, shield, path=["response"])
-
-    @parametrize
-    async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.shields.with_raw_response.list()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        shield = await response.parse()
-        assert_matches_type(ShieldListResponse, shield, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.shields.with_streaming_response.list() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            shield = await response.parse()
-            assert_matches_type(ShieldListResponse, shield, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_register(self, async_client: AsyncLlamaStackClient) -> None:
-        shield = await async_client.shields.register(
-            shield_id="shield_id",
-        )
-        assert_matches_type(Shield, shield, path=["response"])
-
-    @parametrize
-    async def test_method_register_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        shield = await async_client.shields.register(
-            shield_id="shield_id",
-            params={"foo": True},
-            provider_id="provider_id",
-            provider_shield_id="provider_shield_id",
-        )
-        assert_matches_type(Shield, shield, path=["response"])
-
-    @parametrize
-    async def test_raw_response_register(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.shields.with_raw_response.register(
-            shield_id="shield_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        shield = await response.parse()
-        assert_matches_type(Shield, shield, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_register(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.shields.with_streaming_response.register(
-            shield_id="shield_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            shield = await response.parse()
-            assert_matches_type(Shield, shield, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_store.py b/tests/api_resources/test_store.py
new file mode 100644
index 00000000..6fae7ade
--- /dev/null
+++ b/tests/api_resources/test_store.py
@@ -0,0 +1,80 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from tests.utils import assert_matches_type
+from llama_stack_cli import LlamaStackCli, AsyncLlamaStackCli
+from llama_stack_cli.types import StoreListInventoryResponse
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestStore:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_method_list_inventory(self, client: LlamaStackCli) -> None:
+        store = client.store.list_inventory()
+        assert_matches_type(StoreListInventoryResponse, store, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_raw_response_list_inventory(self, client: LlamaStackCli) -> None:
+        response = client.store.with_raw_response.list_inventory()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        store = response.parse()
+        assert_matches_type(StoreListInventoryResponse, store, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_streaming_response_list_inventory(self, client: LlamaStackCli) -> None:
+        with client.store.with_streaming_response.list_inventory() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            store = response.parse()
+            assert_matches_type(StoreListInventoryResponse, store, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncStore:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_method_list_inventory(self, async_client: AsyncLlamaStackCli) -> None:
+        store = await async_client.store.list_inventory()
+        assert_matches_type(StoreListInventoryResponse, store, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_raw_response_list_inventory(self, async_client: AsyncLlamaStackCli) -> None:
+        response = await async_client.store.with_raw_response.list_inventory()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        store = await response.parse()
+        assert_matches_type(StoreListInventoryResponse, store, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_streaming_response_list_inventory(self, async_client: AsyncLlamaStackCli) -> None:
+        async with async_client.store.with_streaming_response.list_inventory() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            store = await response.parse()
+            assert_matches_type(StoreListInventoryResponse, store, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_synthetic_data_generation.py b/tests/api_resources/test_synthetic_data_generation.py
deleted file mode 100644
index c383770e..00000000
--- a/tests/api_resources/test_synthetic_data_generation.py
+++ /dev/null
@@ -1,152 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import SyntheticDataGenerationResponse
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestSyntheticDataGeneration:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_generate(self, client: LlamaStackClient) -> None:
-        synthetic_data_generation = client.synthetic_data_generation.generate(
-            dialogs=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            filtering_function="none",
-        )
-        assert_matches_type(SyntheticDataGenerationResponse, synthetic_data_generation, path=["response"])
-
-    @parametrize
-    def test_method_generate_with_all_params(self, client: LlamaStackClient) -> None:
-        synthetic_data_generation = client.synthetic_data_generation.generate(
-            dialogs=[
-                {
-                    "content": "string",
-                    "role": "user",
-                    "context": "string",
-                }
-            ],
-            filtering_function="none",
-            model="model",
-        )
-        assert_matches_type(SyntheticDataGenerationResponse, synthetic_data_generation, path=["response"])
-
-    @parametrize
-    def test_raw_response_generate(self, client: LlamaStackClient) -> None:
-        response = client.synthetic_data_generation.with_raw_response.generate(
-            dialogs=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            filtering_function="none",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        synthetic_data_generation = response.parse()
-        assert_matches_type(SyntheticDataGenerationResponse, synthetic_data_generation, path=["response"])
-
-    @parametrize
-    def test_streaming_response_generate(self, client: LlamaStackClient) -> None:
-        with client.synthetic_data_generation.with_streaming_response.generate(
-            dialogs=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            filtering_function="none",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            synthetic_data_generation = response.parse()
-            assert_matches_type(SyntheticDataGenerationResponse, synthetic_data_generation, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-
-class TestAsyncSyntheticDataGeneration:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_generate(self, async_client: AsyncLlamaStackClient) -> None:
-        synthetic_data_generation = await async_client.synthetic_data_generation.generate(
-            dialogs=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            filtering_function="none",
-        )
-        assert_matches_type(SyntheticDataGenerationResponse, synthetic_data_generation, path=["response"])
-
-    @parametrize
-    async def test_method_generate_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        synthetic_data_generation = await async_client.synthetic_data_generation.generate(
-            dialogs=[
-                {
-                    "content": "string",
-                    "role": "user",
-                    "context": "string",
-                }
-            ],
-            filtering_function="none",
-            model="model",
-        )
-        assert_matches_type(SyntheticDataGenerationResponse, synthetic_data_generation, path=["response"])
-
-    @parametrize
-    async def test_raw_response_generate(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.synthetic_data_generation.with_raw_response.generate(
-            dialogs=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            filtering_function="none",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        synthetic_data_generation = await response.parse()
-        assert_matches_type(SyntheticDataGenerationResponse, synthetic_data_generation, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_generate(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.synthetic_data_generation.with_streaming_response.generate(
-            dialogs=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            filtering_function="none",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            synthetic_data_generation = await response.parse()
-            assert_matches_type(SyntheticDataGenerationResponse, synthetic_data_generation, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_telemetry.py b/tests/api_resources/test_telemetry.py
deleted file mode 100644
index 14a8801c..00000000
--- a/tests/api_resources/test_telemetry.py
+++ /dev/null
@@ -1,813 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import (
-    Trace,
-    TelemetryGetSpanResponse,
-    TelemetryQuerySpansResponse,
-    TelemetryGetSpanTreeResponse,
-    TelemetryQueryTracesResponse,
-)
-from llama_stack_client._utils import parse_datetime
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestTelemetry:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_get_span(self, client: LlamaStackClient) -> None:
-        telemetry = client.telemetry.get_span(
-            span_id="span_id",
-            trace_id="trace_id",
-        )
-        assert_matches_type(TelemetryGetSpanResponse, telemetry, path=["response"])
-
-    @parametrize
-    def test_raw_response_get_span(self, client: LlamaStackClient) -> None:
-        response = client.telemetry.with_raw_response.get_span(
-            span_id="span_id",
-            trace_id="trace_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        telemetry = response.parse()
-        assert_matches_type(TelemetryGetSpanResponse, telemetry, path=["response"])
-
-    @parametrize
-    def test_streaming_response_get_span(self, client: LlamaStackClient) -> None:
-        with client.telemetry.with_streaming_response.get_span(
-            span_id="span_id",
-            trace_id="trace_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            telemetry = response.parse()
-            assert_matches_type(TelemetryGetSpanResponse, telemetry, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_get_span(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `trace_id` but received ''"):
-            client.telemetry.with_raw_response.get_span(
-                span_id="span_id",
-                trace_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `span_id` but received ''"):
-            client.telemetry.with_raw_response.get_span(
-                span_id="",
-                trace_id="trace_id",
-            )
-
-    @parametrize
-    def test_method_get_span_tree(self, client: LlamaStackClient) -> None:
-        telemetry = client.telemetry.get_span_tree(
-            span_id="span_id",
-        )
-        assert_matches_type(TelemetryGetSpanTreeResponse, telemetry, path=["response"])
-
-    @parametrize
-    def test_method_get_span_tree_with_all_params(self, client: LlamaStackClient) -> None:
-        telemetry = client.telemetry.get_span_tree(
-            span_id="span_id",
-            attributes_to_return=["string"],
-            max_depth=0,
-        )
-        assert_matches_type(TelemetryGetSpanTreeResponse, telemetry, path=["response"])
-
-    @parametrize
-    def test_raw_response_get_span_tree(self, client: LlamaStackClient) -> None:
-        response = client.telemetry.with_raw_response.get_span_tree(
-            span_id="span_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        telemetry = response.parse()
-        assert_matches_type(TelemetryGetSpanTreeResponse, telemetry, path=["response"])
-
-    @parametrize
-    def test_streaming_response_get_span_tree(self, client: LlamaStackClient) -> None:
-        with client.telemetry.with_streaming_response.get_span_tree(
-            span_id="span_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            telemetry = response.parse()
-            assert_matches_type(TelemetryGetSpanTreeResponse, telemetry, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_get_span_tree(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `span_id` but received ''"):
-            client.telemetry.with_raw_response.get_span_tree(
-                span_id="",
-            )
-
-    @parametrize
-    def test_method_get_trace(self, client: LlamaStackClient) -> None:
-        telemetry = client.telemetry.get_trace(
-            "trace_id",
-        )
-        assert_matches_type(Trace, telemetry, path=["response"])
-
-    @parametrize
-    def test_raw_response_get_trace(self, client: LlamaStackClient) -> None:
-        response = client.telemetry.with_raw_response.get_trace(
-            "trace_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        telemetry = response.parse()
-        assert_matches_type(Trace, telemetry, path=["response"])
-
-    @parametrize
-    def test_streaming_response_get_trace(self, client: LlamaStackClient) -> None:
-        with client.telemetry.with_streaming_response.get_trace(
-            "trace_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            telemetry = response.parse()
-            assert_matches_type(Trace, telemetry, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_get_trace(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `trace_id` but received ''"):
-            client.telemetry.with_raw_response.get_trace(
-                "",
-            )
-
-    @parametrize
-    def test_method_log_event(self, client: LlamaStackClient) -> None:
-        telemetry = client.telemetry.log_event(
-            event={
-                "message": "message",
-                "severity": "verbose",
-                "span_id": "span_id",
-                "timestamp": parse_datetime("2019-12-27T18:11:19.117Z"),
-                "trace_id": "trace_id",
-                "type": "unstructured_log",
-            },
-            ttl_seconds=0,
-        )
-        assert telemetry is None
-
-    @parametrize
-    def test_method_log_event_with_all_params(self, client: LlamaStackClient) -> None:
-        telemetry = client.telemetry.log_event(
-            event={
-                "message": "message",
-                "severity": "verbose",
-                "span_id": "span_id",
-                "timestamp": parse_datetime("2019-12-27T18:11:19.117Z"),
-                "trace_id": "trace_id",
-                "type": "unstructured_log",
-                "attributes": {"foo": "string"},
-            },
-            ttl_seconds=0,
-        )
-        assert telemetry is None
-
-    @parametrize
-    def test_raw_response_log_event(self, client: LlamaStackClient) -> None:
-        response = client.telemetry.with_raw_response.log_event(
-            event={
-                "message": "message",
-                "severity": "verbose",
-                "span_id": "span_id",
-                "timestamp": parse_datetime("2019-12-27T18:11:19.117Z"),
-                "trace_id": "trace_id",
-                "type": "unstructured_log",
-            },
-            ttl_seconds=0,
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        telemetry = response.parse()
-        assert telemetry is None
-
-    @parametrize
-    def test_streaming_response_log_event(self, client: LlamaStackClient) -> None:
-        with client.telemetry.with_streaming_response.log_event(
-            event={
-                "message": "message",
-                "severity": "verbose",
-                "span_id": "span_id",
-                "timestamp": parse_datetime("2019-12-27T18:11:19.117Z"),
-                "trace_id": "trace_id",
-                "type": "unstructured_log",
-            },
-            ttl_seconds=0,
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            telemetry = response.parse()
-            assert telemetry is None
-
-        assert cast(Any, response.is_closed) is True
-
-    @pytest.mark.skip(reason="unsupported query params in java / kotlin")
-    @parametrize
-    def test_method_query_spans(self, client: LlamaStackClient) -> None:
-        telemetry = client.telemetry.query_spans(
-            attribute_filters=[
-                {
-                    "key": "key",
-                    "op": "eq",
-                    "value": True,
-                }
-            ],
-            attributes_to_return=["string"],
-        )
-        assert_matches_type(TelemetryQuerySpansResponse, telemetry, path=["response"])
-
-    @pytest.mark.skip(reason="unsupported query params in java / kotlin")
-    @parametrize
-    def test_method_query_spans_with_all_params(self, client: LlamaStackClient) -> None:
-        telemetry = client.telemetry.query_spans(
-            attribute_filters=[
-                {
-                    "key": "key",
-                    "op": "eq",
-                    "value": True,
-                }
-            ],
-            attributes_to_return=["string"],
-            max_depth=0,
-        )
-        assert_matches_type(TelemetryQuerySpansResponse, telemetry, path=["response"])
-
-    @pytest.mark.skip(reason="unsupported query params in java / kotlin")
-    @parametrize
-    def test_raw_response_query_spans(self, client: LlamaStackClient) -> None:
-        response = client.telemetry.with_raw_response.query_spans(
-            attribute_filters=[
-                {
-                    "key": "key",
-                    "op": "eq",
-                    "value": True,
-                }
-            ],
-            attributes_to_return=["string"],
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        telemetry = response.parse()
-        assert_matches_type(TelemetryQuerySpansResponse, telemetry, path=["response"])
-
-    @pytest.mark.skip(reason="unsupported query params in java / kotlin")
-    @parametrize
-    def test_streaming_response_query_spans(self, client: LlamaStackClient) -> None:
-        with client.telemetry.with_streaming_response.query_spans(
-            attribute_filters=[
-                {
-                    "key": "key",
-                    "op": "eq",
-                    "value": True,
-                }
-            ],
-            attributes_to_return=["string"],
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            telemetry = response.parse()
-            assert_matches_type(TelemetryQuerySpansResponse, telemetry, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @pytest.mark.skip(reason="unsupported query params in java / kotlin")
-    @parametrize
-    def test_method_query_traces(self, client: LlamaStackClient) -> None:
-        telemetry = client.telemetry.query_traces()
-        assert_matches_type(TelemetryQueryTracesResponse, telemetry, path=["response"])
-
-    @pytest.mark.skip(reason="unsupported query params in java / kotlin")
-    @parametrize
-    def test_method_query_traces_with_all_params(self, client: LlamaStackClient) -> None:
-        telemetry = client.telemetry.query_traces(
-            attribute_filters=[
-                {
-                    "key": "key",
-                    "op": "eq",
-                    "value": True,
-                }
-            ],
-            limit=0,
-            offset=0,
-            order_by=["string"],
-        )
-        assert_matches_type(TelemetryQueryTracesResponse, telemetry, path=["response"])
-
-    @pytest.mark.skip(reason="unsupported query params in java / kotlin")
-    @parametrize
-    def test_raw_response_query_traces(self, client: LlamaStackClient) -> None:
-        response = client.telemetry.with_raw_response.query_traces()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        telemetry = response.parse()
-        assert_matches_type(TelemetryQueryTracesResponse, telemetry, path=["response"])
-
-    @pytest.mark.skip(reason="unsupported query params in java / kotlin")
-    @parametrize
-    def test_streaming_response_query_traces(self, client: LlamaStackClient) -> None:
-        with client.telemetry.with_streaming_response.query_traces() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            telemetry = response.parse()
-            assert_matches_type(TelemetryQueryTracesResponse, telemetry, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_save_spans_to_dataset(self, client: LlamaStackClient) -> None:
-        telemetry = client.telemetry.save_spans_to_dataset(
-            attribute_filters=[
-                {
-                    "key": "key",
-                    "op": "eq",
-                    "value": True,
-                }
-            ],
-            attributes_to_save=["string"],
-            dataset_id="dataset_id",
-        )
-        assert telemetry is None
-
-    @parametrize
-    def test_method_save_spans_to_dataset_with_all_params(self, client: LlamaStackClient) -> None:
-        telemetry = client.telemetry.save_spans_to_dataset(
-            attribute_filters=[
-                {
-                    "key": "key",
-                    "op": "eq",
-                    "value": True,
-                }
-            ],
-            attributes_to_save=["string"],
-            dataset_id="dataset_id",
-            max_depth=0,
-        )
-        assert telemetry is None
-
-    @parametrize
-    def test_raw_response_save_spans_to_dataset(self, client: LlamaStackClient) -> None:
-        response = client.telemetry.with_raw_response.save_spans_to_dataset(
-            attribute_filters=[
-                {
-                    "key": "key",
-                    "op": "eq",
-                    "value": True,
-                }
-            ],
-            attributes_to_save=["string"],
-            dataset_id="dataset_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        telemetry = response.parse()
-        assert telemetry is None
-
-    @parametrize
-    def test_streaming_response_save_spans_to_dataset(self, client: LlamaStackClient) -> None:
-        with client.telemetry.with_streaming_response.save_spans_to_dataset(
-            attribute_filters=[
-                {
-                    "key": "key",
-                    "op": "eq",
-                    "value": True,
-                }
-            ],
-            attributes_to_save=["string"],
-            dataset_id="dataset_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            telemetry = response.parse()
-            assert telemetry is None
-
-        assert cast(Any, response.is_closed) is True
-
-
-class TestAsyncTelemetry:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_get_span(self, async_client: AsyncLlamaStackClient) -> None:
-        telemetry = await async_client.telemetry.get_span(
-            span_id="span_id",
-            trace_id="trace_id",
-        )
-        assert_matches_type(TelemetryGetSpanResponse, telemetry, path=["response"])
-
-    @parametrize
-    async def test_raw_response_get_span(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.telemetry.with_raw_response.get_span(
-            span_id="span_id",
-            trace_id="trace_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        telemetry = await response.parse()
-        assert_matches_type(TelemetryGetSpanResponse, telemetry, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_get_span(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.telemetry.with_streaming_response.get_span(
-            span_id="span_id",
-            trace_id="trace_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            telemetry = await response.parse()
-            assert_matches_type(TelemetryGetSpanResponse, telemetry, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_get_span(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `trace_id` but received ''"):
-            await async_client.telemetry.with_raw_response.get_span(
-                span_id="span_id",
-                trace_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `span_id` but received ''"):
-            await async_client.telemetry.with_raw_response.get_span(
-                span_id="",
-                trace_id="trace_id",
-            )
-
-    @parametrize
-    async def test_method_get_span_tree(self, async_client: AsyncLlamaStackClient) -> None:
-        telemetry = await async_client.telemetry.get_span_tree(
-            span_id="span_id",
-        )
-        assert_matches_type(TelemetryGetSpanTreeResponse, telemetry, path=["response"])
-
-    @parametrize
-    async def test_method_get_span_tree_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        telemetry = await async_client.telemetry.get_span_tree(
-            span_id="span_id",
-            attributes_to_return=["string"],
-            max_depth=0,
-        )
-        assert_matches_type(TelemetryGetSpanTreeResponse, telemetry, path=["response"])
-
-    @parametrize
-    async def test_raw_response_get_span_tree(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.telemetry.with_raw_response.get_span_tree(
-            span_id="span_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        telemetry = await response.parse()
-        assert_matches_type(TelemetryGetSpanTreeResponse, telemetry, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_get_span_tree(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.telemetry.with_streaming_response.get_span_tree(
-            span_id="span_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            telemetry = await response.parse()
-            assert_matches_type(TelemetryGetSpanTreeResponse, telemetry, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_get_span_tree(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `span_id` but received ''"):
-            await async_client.telemetry.with_raw_response.get_span_tree(
-                span_id="",
-            )
-
-    @parametrize
-    async def test_method_get_trace(self, async_client: AsyncLlamaStackClient) -> None:
-        telemetry = await async_client.telemetry.get_trace(
-            "trace_id",
-        )
-        assert_matches_type(Trace, telemetry, path=["response"])
-
-    @parametrize
-    async def test_raw_response_get_trace(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.telemetry.with_raw_response.get_trace(
-            "trace_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        telemetry = await response.parse()
-        assert_matches_type(Trace, telemetry, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_get_trace(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.telemetry.with_streaming_response.get_trace(
-            "trace_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            telemetry = await response.parse()
-            assert_matches_type(Trace, telemetry, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_get_trace(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `trace_id` but received ''"):
-            await async_client.telemetry.with_raw_response.get_trace(
-                "",
-            )
-
-    @parametrize
-    async def test_method_log_event(self, async_client: AsyncLlamaStackClient) -> None:
-        telemetry = await async_client.telemetry.log_event(
-            event={
-                "message": "message",
-                "severity": "verbose",
-                "span_id": "span_id",
-                "timestamp": parse_datetime("2019-12-27T18:11:19.117Z"),
-                "trace_id": "trace_id",
-                "type": "unstructured_log",
-            },
-            ttl_seconds=0,
-        )
-        assert telemetry is None
-
-    @parametrize
-    async def test_method_log_event_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        telemetry = await async_client.telemetry.log_event(
-            event={
-                "message": "message",
-                "severity": "verbose",
-                "span_id": "span_id",
-                "timestamp": parse_datetime("2019-12-27T18:11:19.117Z"),
-                "trace_id": "trace_id",
-                "type": "unstructured_log",
-                "attributes": {"foo": "string"},
-            },
-            ttl_seconds=0,
-        )
-        assert telemetry is None
-
-    @parametrize
-    async def test_raw_response_log_event(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.telemetry.with_raw_response.log_event(
-            event={
-                "message": "message",
-                "severity": "verbose",
-                "span_id": "span_id",
-                "timestamp": parse_datetime("2019-12-27T18:11:19.117Z"),
-                "trace_id": "trace_id",
-                "type": "unstructured_log",
-            },
-            ttl_seconds=0,
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        telemetry = await response.parse()
-        assert telemetry is None
-
-    @parametrize
-    async def test_streaming_response_log_event(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.telemetry.with_streaming_response.log_event(
-            event={
-                "message": "message",
-                "severity": "verbose",
-                "span_id": "span_id",
-                "timestamp": parse_datetime("2019-12-27T18:11:19.117Z"),
-                "trace_id": "trace_id",
-                "type": "unstructured_log",
-            },
-            ttl_seconds=0,
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            telemetry = await response.parse()
-            assert telemetry is None
-
-        assert cast(Any, response.is_closed) is True
-
-    @pytest.mark.skip(reason="unsupported query params in java / kotlin")
-    @parametrize
-    async def test_method_query_spans(self, async_client: AsyncLlamaStackClient) -> None:
-        telemetry = await async_client.telemetry.query_spans(
-            attribute_filters=[
-                {
-                    "key": "key",
-                    "op": "eq",
-                    "value": True,
-                }
-            ],
-            attributes_to_return=["string"],
-        )
-        assert_matches_type(TelemetryQuerySpansResponse, telemetry, path=["response"])
-
-    @pytest.mark.skip(reason="unsupported query params in java / kotlin")
-    @parametrize
-    async def test_method_query_spans_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        telemetry = await async_client.telemetry.query_spans(
-            attribute_filters=[
-                {
-                    "key": "key",
-                    "op": "eq",
-                    "value": True,
-                }
-            ],
-            attributes_to_return=["string"],
-            max_depth=0,
-        )
-        assert_matches_type(TelemetryQuerySpansResponse, telemetry, path=["response"])
-
-    @pytest.mark.skip(reason="unsupported query params in java / kotlin")
-    @parametrize
-    async def test_raw_response_query_spans(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.telemetry.with_raw_response.query_spans(
-            attribute_filters=[
-                {
-                    "key": "key",
-                    "op": "eq",
-                    "value": True,
-                }
-            ],
-            attributes_to_return=["string"],
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        telemetry = await response.parse()
-        assert_matches_type(TelemetryQuerySpansResponse, telemetry, path=["response"])
-
-    @pytest.mark.skip(reason="unsupported query params in java / kotlin")
-    @parametrize
-    async def test_streaming_response_query_spans(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.telemetry.with_streaming_response.query_spans(
-            attribute_filters=[
-                {
-                    "key": "key",
-                    "op": "eq",
-                    "value": True,
-                }
-            ],
-            attributes_to_return=["string"],
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            telemetry = await response.parse()
-            assert_matches_type(TelemetryQuerySpansResponse, telemetry, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @pytest.mark.skip(reason="unsupported query params in java / kotlin")
-    @parametrize
-    async def test_method_query_traces(self, async_client: AsyncLlamaStackClient) -> None:
-        telemetry = await async_client.telemetry.query_traces()
-        assert_matches_type(TelemetryQueryTracesResponse, telemetry, path=["response"])
-
-    @pytest.mark.skip(reason="unsupported query params in java / kotlin")
-    @parametrize
-    async def test_method_query_traces_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        telemetry = await async_client.telemetry.query_traces(
-            attribute_filters=[
-                {
-                    "key": "key",
-                    "op": "eq",
-                    "value": True,
-                }
-            ],
-            limit=0,
-            offset=0,
-            order_by=["string"],
-        )
-        assert_matches_type(TelemetryQueryTracesResponse, telemetry, path=["response"])
-
-    @pytest.mark.skip(reason="unsupported query params in java / kotlin")
-    @parametrize
-    async def test_raw_response_query_traces(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.telemetry.with_raw_response.query_traces()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        telemetry = await response.parse()
-        assert_matches_type(TelemetryQueryTracesResponse, telemetry, path=["response"])
-
-    @pytest.mark.skip(reason="unsupported query params in java / kotlin")
-    @parametrize
-    async def test_streaming_response_query_traces(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.telemetry.with_streaming_response.query_traces() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            telemetry = await response.parse()
-            assert_matches_type(TelemetryQueryTracesResponse, telemetry, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_save_spans_to_dataset(self, async_client: AsyncLlamaStackClient) -> None:
-        telemetry = await async_client.telemetry.save_spans_to_dataset(
-            attribute_filters=[
-                {
-                    "key": "key",
-                    "op": "eq",
-                    "value": True,
-                }
-            ],
-            attributes_to_save=["string"],
-            dataset_id="dataset_id",
-        )
-        assert telemetry is None
-
-    @parametrize
-    async def test_method_save_spans_to_dataset_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        telemetry = await async_client.telemetry.save_spans_to_dataset(
-            attribute_filters=[
-                {
-                    "key": "key",
-                    "op": "eq",
-                    "value": True,
-                }
-            ],
-            attributes_to_save=["string"],
-            dataset_id="dataset_id",
-            max_depth=0,
-        )
-        assert telemetry is None
-
-    @parametrize
-    async def test_raw_response_save_spans_to_dataset(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.telemetry.with_raw_response.save_spans_to_dataset(
-            attribute_filters=[
-                {
-                    "key": "key",
-                    "op": "eq",
-                    "value": True,
-                }
-            ],
-            attributes_to_save=["string"],
-            dataset_id="dataset_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        telemetry = await response.parse()
-        assert telemetry is None
-
-    @parametrize
-    async def test_streaming_response_save_spans_to_dataset(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.telemetry.with_streaming_response.save_spans_to_dataset(
-            attribute_filters=[
-                {
-                    "key": "key",
-                    "op": "eq",
-                    "value": True,
-                }
-            ],
-            attributes_to_save=["string"],
-            dataset_id="dataset_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            telemetry = await response.parse()
-            assert telemetry is None
-
-        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_tool_runtime.py b/tests/api_resources/test_tool_runtime.py
deleted file mode 100644
index fa79b1ba..00000000
--- a/tests/api_resources/test_tool_runtime.py
+++ /dev/null
@@ -1,161 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import (
-    ToolInvocationResult,
-    ToolRuntimeListToolsResponse,
-)
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestToolRuntime:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_invoke_tool(self, client: LlamaStackClient) -> None:
-        tool_runtime = client.tool_runtime.invoke_tool(
-            kwargs={"foo": True},
-            tool_name="tool_name",
-        )
-        assert_matches_type(ToolInvocationResult, tool_runtime, path=["response"])
-
-    @parametrize
-    def test_raw_response_invoke_tool(self, client: LlamaStackClient) -> None:
-        response = client.tool_runtime.with_raw_response.invoke_tool(
-            kwargs={"foo": True},
-            tool_name="tool_name",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        tool_runtime = response.parse()
-        assert_matches_type(ToolInvocationResult, tool_runtime, path=["response"])
-
-    @parametrize
-    def test_streaming_response_invoke_tool(self, client: LlamaStackClient) -> None:
-        with client.tool_runtime.with_streaming_response.invoke_tool(
-            kwargs={"foo": True},
-            tool_name="tool_name",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            tool_runtime = response.parse()
-            assert_matches_type(ToolInvocationResult, tool_runtime, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_list_tools(self, client: LlamaStackClient) -> None:
-        tool_runtime = client.tool_runtime.list_tools()
-        assert_matches_type(ToolRuntimeListToolsResponse, tool_runtime, path=["response"])
-
-    @parametrize
-    def test_method_list_tools_with_all_params(self, client: LlamaStackClient) -> None:
-        tool_runtime = client.tool_runtime.list_tools(
-            mcp_endpoint={"uri": "uri"},
-            tool_group_id="tool_group_id",
-        )
-        assert_matches_type(ToolRuntimeListToolsResponse, tool_runtime, path=["response"])
-
-    @parametrize
-    def test_raw_response_list_tools(self, client: LlamaStackClient) -> None:
-        response = client.tool_runtime.with_raw_response.list_tools()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        tool_runtime = response.parse()
-        assert_matches_type(ToolRuntimeListToolsResponse, tool_runtime, path=["response"])
-
-    @parametrize
-    def test_streaming_response_list_tools(self, client: LlamaStackClient) -> None:
-        with client.tool_runtime.with_streaming_response.list_tools() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            tool_runtime = response.parse()
-            assert_matches_type(ToolRuntimeListToolsResponse, tool_runtime, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-
-class TestAsyncToolRuntime:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_invoke_tool(self, async_client: AsyncLlamaStackClient) -> None:
-        tool_runtime = await async_client.tool_runtime.invoke_tool(
-            kwargs={"foo": True},
-            tool_name="tool_name",
-        )
-        assert_matches_type(ToolInvocationResult, tool_runtime, path=["response"])
-
-    @parametrize
-    async def test_raw_response_invoke_tool(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.tool_runtime.with_raw_response.invoke_tool(
-            kwargs={"foo": True},
-            tool_name="tool_name",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        tool_runtime = await response.parse()
-        assert_matches_type(ToolInvocationResult, tool_runtime, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_invoke_tool(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.tool_runtime.with_streaming_response.invoke_tool(
-            kwargs={"foo": True},
-            tool_name="tool_name",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            tool_runtime = await response.parse()
-            assert_matches_type(ToolInvocationResult, tool_runtime, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_list_tools(self, async_client: AsyncLlamaStackClient) -> None:
-        tool_runtime = await async_client.tool_runtime.list_tools()
-        assert_matches_type(ToolRuntimeListToolsResponse, tool_runtime, path=["response"])
-
-    @parametrize
-    async def test_method_list_tools_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        tool_runtime = await async_client.tool_runtime.list_tools(
-            mcp_endpoint={"uri": "uri"},
-            tool_group_id="tool_group_id",
-        )
-        assert_matches_type(ToolRuntimeListToolsResponse, tool_runtime, path=["response"])
-
-    @parametrize
-    async def test_raw_response_list_tools(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.tool_runtime.with_raw_response.list_tools()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        tool_runtime = await response.parse()
-        assert_matches_type(ToolRuntimeListToolsResponse, tool_runtime, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_list_tools(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.tool_runtime.with_streaming_response.list_tools() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            tool_runtime = await response.parse()
-            assert_matches_type(ToolRuntimeListToolsResponse, tool_runtime, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_toolgroups.py b/tests/api_resources/test_toolgroups.py
deleted file mode 100644
index 1b8e5bce..00000000
--- a/tests/api_resources/test_toolgroups.py
+++ /dev/null
@@ -1,314 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import ToolGroup, ToolgroupListResponse
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestToolgroups:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_list(self, client: LlamaStackClient) -> None:
-        toolgroup = client.toolgroups.list()
-        assert_matches_type(ToolgroupListResponse, toolgroup, path=["response"])
-
-    @parametrize
-    def test_raw_response_list(self, client: LlamaStackClient) -> None:
-        response = client.toolgroups.with_raw_response.list()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        toolgroup = response.parse()
-        assert_matches_type(ToolgroupListResponse, toolgroup, path=["response"])
-
-    @parametrize
-    def test_streaming_response_list(self, client: LlamaStackClient) -> None:
-        with client.toolgroups.with_streaming_response.list() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            toolgroup = response.parse()
-            assert_matches_type(ToolgroupListResponse, toolgroup, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_get(self, client: LlamaStackClient) -> None:
-        toolgroup = client.toolgroups.get(
-            "toolgroup_id",
-        )
-        assert_matches_type(ToolGroup, toolgroup, path=["response"])
-
-    @parametrize
-    def test_raw_response_get(self, client: LlamaStackClient) -> None:
-        response = client.toolgroups.with_raw_response.get(
-            "toolgroup_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        toolgroup = response.parse()
-        assert_matches_type(ToolGroup, toolgroup, path=["response"])
-
-    @parametrize
-    def test_streaming_response_get(self, client: LlamaStackClient) -> None:
-        with client.toolgroups.with_streaming_response.get(
-            "toolgroup_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            toolgroup = response.parse()
-            assert_matches_type(ToolGroup, toolgroup, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_get(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `toolgroup_id` but received ''"):
-            client.toolgroups.with_raw_response.get(
-                "",
-            )
-
-    @parametrize
-    def test_method_register(self, client: LlamaStackClient) -> None:
-        toolgroup = client.toolgroups.register(
-            provider_id="provider_id",
-            toolgroup_id="toolgroup_id",
-        )
-        assert toolgroup is None
-
-    @parametrize
-    def test_method_register_with_all_params(self, client: LlamaStackClient) -> None:
-        toolgroup = client.toolgroups.register(
-            provider_id="provider_id",
-            toolgroup_id="toolgroup_id",
-            args={"foo": True},
-            mcp_endpoint={"uri": "uri"},
-        )
-        assert toolgroup is None
-
-    @parametrize
-    def test_raw_response_register(self, client: LlamaStackClient) -> None:
-        response = client.toolgroups.with_raw_response.register(
-            provider_id="provider_id",
-            toolgroup_id="toolgroup_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        toolgroup = response.parse()
-        assert toolgroup is None
-
-    @parametrize
-    def test_streaming_response_register(self, client: LlamaStackClient) -> None:
-        with client.toolgroups.with_streaming_response.register(
-            provider_id="provider_id",
-            toolgroup_id="toolgroup_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            toolgroup = response.parse()
-            assert toolgroup is None
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_unregister(self, client: LlamaStackClient) -> None:
-        toolgroup = client.toolgroups.unregister(
-            "toolgroup_id",
-        )
-        assert toolgroup is None
-
-    @parametrize
-    def test_raw_response_unregister(self, client: LlamaStackClient) -> None:
-        response = client.toolgroups.with_raw_response.unregister(
-            "toolgroup_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        toolgroup = response.parse()
-        assert toolgroup is None
-
-    @parametrize
-    def test_streaming_response_unregister(self, client: LlamaStackClient) -> None:
-        with client.toolgroups.with_streaming_response.unregister(
-            "toolgroup_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            toolgroup = response.parse()
-            assert toolgroup is None
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_unregister(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `toolgroup_id` but received ''"):
-            client.toolgroups.with_raw_response.unregister(
-                "",
-            )
-
-
-class TestAsyncToolgroups:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
-        toolgroup = await async_client.toolgroups.list()
-        assert_matches_type(ToolgroupListResponse, toolgroup, path=["response"])
-
-    @parametrize
-    async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.toolgroups.with_raw_response.list()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        toolgroup = await response.parse()
-        assert_matches_type(ToolgroupListResponse, toolgroup, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.toolgroups.with_streaming_response.list() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            toolgroup = await response.parse()
-            assert_matches_type(ToolgroupListResponse, toolgroup, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_get(self, async_client: AsyncLlamaStackClient) -> None:
-        toolgroup = await async_client.toolgroups.get(
-            "toolgroup_id",
-        )
-        assert_matches_type(ToolGroup, toolgroup, path=["response"])
-
-    @parametrize
-    async def test_raw_response_get(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.toolgroups.with_raw_response.get(
-            "toolgroup_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        toolgroup = await response.parse()
-        assert_matches_type(ToolGroup, toolgroup, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_get(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.toolgroups.with_streaming_response.get(
-            "toolgroup_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            toolgroup = await response.parse()
-            assert_matches_type(ToolGroup, toolgroup, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_get(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `toolgroup_id` but received ''"):
-            await async_client.toolgroups.with_raw_response.get(
-                "",
-            )
-
-    @parametrize
-    async def test_method_register(self, async_client: AsyncLlamaStackClient) -> None:
-        toolgroup = await async_client.toolgroups.register(
-            provider_id="provider_id",
-            toolgroup_id="toolgroup_id",
-        )
-        assert toolgroup is None
-
-    @parametrize
-    async def test_method_register_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        toolgroup = await async_client.toolgroups.register(
-            provider_id="provider_id",
-            toolgroup_id="toolgroup_id",
-            args={"foo": True},
-            mcp_endpoint={"uri": "uri"},
-        )
-        assert toolgroup is None
-
-    @parametrize
-    async def test_raw_response_register(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.toolgroups.with_raw_response.register(
-            provider_id="provider_id",
-            toolgroup_id="toolgroup_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        toolgroup = await response.parse()
-        assert toolgroup is None
-
-    @parametrize
-    async def test_streaming_response_register(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.toolgroups.with_streaming_response.register(
-            provider_id="provider_id",
-            toolgroup_id="toolgroup_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            toolgroup = await response.parse()
-            assert toolgroup is None
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_unregister(self, async_client: AsyncLlamaStackClient) -> None:
-        toolgroup = await async_client.toolgroups.unregister(
-            "toolgroup_id",
-        )
-        assert toolgroup is None
-
-    @parametrize
-    async def test_raw_response_unregister(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.toolgroups.with_raw_response.unregister(
-            "toolgroup_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        toolgroup = await response.parse()
-        assert toolgroup is None
-
-    @parametrize
-    async def test_streaming_response_unregister(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.toolgroups.with_streaming_response.unregister(
-            "toolgroup_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            toolgroup = await response.parse()
-            assert toolgroup is None
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_unregister(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `toolgroup_id` but received ''"):
-            await async_client.toolgroups.with_raw_response.unregister(
-                "",
-            )
diff --git a/tests/api_resources/test_tools.py b/tests/api_resources/test_tools.py
deleted file mode 100644
index 3c1f0da4..00000000
--- a/tests/api_resources/test_tools.py
+++ /dev/null
@@ -1,164 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import Tool, ToolListResponse
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestTools:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_list(self, client: LlamaStackClient) -> None:
-        tool = client.tools.list()
-        assert_matches_type(ToolListResponse, tool, path=["response"])
-
-    @parametrize
-    def test_method_list_with_all_params(self, client: LlamaStackClient) -> None:
-        tool = client.tools.list(
-            toolgroup_id="toolgroup_id",
-        )
-        assert_matches_type(ToolListResponse, tool, path=["response"])
-
-    @parametrize
-    def test_raw_response_list(self, client: LlamaStackClient) -> None:
-        response = client.tools.with_raw_response.list()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        tool = response.parse()
-        assert_matches_type(ToolListResponse, tool, path=["response"])
-
-    @parametrize
-    def test_streaming_response_list(self, client: LlamaStackClient) -> None:
-        with client.tools.with_streaming_response.list() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            tool = response.parse()
-            assert_matches_type(ToolListResponse, tool, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_get(self, client: LlamaStackClient) -> None:
-        tool = client.tools.get(
-            "tool_name",
-        )
-        assert_matches_type(Tool, tool, path=["response"])
-
-    @parametrize
-    def test_raw_response_get(self, client: LlamaStackClient) -> None:
-        response = client.tools.with_raw_response.get(
-            "tool_name",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        tool = response.parse()
-        assert_matches_type(Tool, tool, path=["response"])
-
-    @parametrize
-    def test_streaming_response_get(self, client: LlamaStackClient) -> None:
-        with client.tools.with_streaming_response.get(
-            "tool_name",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            tool = response.parse()
-            assert_matches_type(Tool, tool, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_get(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `tool_name` but received ''"):
-            client.tools.with_raw_response.get(
-                "",
-            )
-
-
-class TestAsyncTools:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
-        tool = await async_client.tools.list()
-        assert_matches_type(ToolListResponse, tool, path=["response"])
-
-    @parametrize
-    async def test_method_list_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        tool = await async_client.tools.list(
-            toolgroup_id="toolgroup_id",
-        )
-        assert_matches_type(ToolListResponse, tool, path=["response"])
-
-    @parametrize
-    async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.tools.with_raw_response.list()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        tool = await response.parse()
-        assert_matches_type(ToolListResponse, tool, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.tools.with_streaming_response.list() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            tool = await response.parse()
-            assert_matches_type(ToolListResponse, tool, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_get(self, async_client: AsyncLlamaStackClient) -> None:
-        tool = await async_client.tools.get(
-            "tool_name",
-        )
-        assert_matches_type(Tool, tool, path=["response"])
-
-    @parametrize
-    async def test_raw_response_get(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.tools.with_raw_response.get(
-            "tool_name",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        tool = await response.parse()
-        assert_matches_type(Tool, tool, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_get(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.tools.with_streaming_response.get(
-            "tool_name",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            tool = await response.parse()
-            assert_matches_type(Tool, tool, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_get(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `tool_name` but received ''"):
-            await async_client.tools.with_raw_response.get(
-                "",
-            )
diff --git a/tests/api_resources/test_user.py b/tests/api_resources/test_user.py
new file mode 100644
index 00000000..d7b0c1ff
--- /dev/null
+++ b/tests/api_resources/test_user.py
@@ -0,0 +1,620 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from tests.utils import assert_matches_type
+from llama_stack_cli import LlamaStackCli, AsyncLlamaStackCli
+from llama_stack_cli.types import (
+    User,
+)
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+
+
+class TestUser:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_method_create(self, client: LlamaStackCli) -> None:
+        user = client.user.create()
+        assert_matches_type(User, user, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_method_create_with_all_params(self, client: LlamaStackCli) -> None:
+        user = client.user.create(
+            id=10,
+            email="john@email.com",
+            first_name="John",
+            last_name="James",
+            password="12345",
+            phone="12345",
+            username="theUser",
+            user_status=1,
+        )
+        assert_matches_type(User, user, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_raw_response_create(self, client: LlamaStackCli) -> None:
+        response = client.user.with_raw_response.create()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        user = response.parse()
+        assert_matches_type(User, user, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_streaming_response_create(self, client: LlamaStackCli) -> None:
+        with client.user.with_streaming_response.create() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            user = response.parse()
+            assert_matches_type(User, user, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_method_retrieve(self, client: LlamaStackCli) -> None:
+        user = client.user.retrieve(
+            "username",
+        )
+        assert_matches_type(User, user, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_raw_response_retrieve(self, client: LlamaStackCli) -> None:
+        response = client.user.with_raw_response.retrieve(
+            "username",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        user = response.parse()
+        assert_matches_type(User, user, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_streaming_response_retrieve(self, client: LlamaStackCli) -> None:
+        with client.user.with_streaming_response.retrieve(
+            "username",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            user = response.parse()
+            assert_matches_type(User, user, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_path_params_retrieve(self, client: LlamaStackCli) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `username` but received ''"):
+            client.user.with_raw_response.retrieve(
+                "",
+            )
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_method_update(self, client: LlamaStackCli) -> None:
+        user = client.user.update(
+            existing_username="username",
+        )
+        assert user is None
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_method_update_with_all_params(self, client: LlamaStackCli) -> None:
+        user = client.user.update(
+            existing_username="username",
+            id=10,
+            email="john@email.com",
+            first_name="John",
+            last_name="James",
+            password="12345",
+            phone="12345",
+            username="theUser",
+            user_status=1,
+        )
+        assert user is None
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_raw_response_update(self, client: LlamaStackCli) -> None:
+        response = client.user.with_raw_response.update(
+            existing_username="username",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        user = response.parse()
+        assert user is None
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_streaming_response_update(self, client: LlamaStackCli) -> None:
+        with client.user.with_streaming_response.update(
+            existing_username="username",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            user = response.parse()
+            assert user is None
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_path_params_update(self, client: LlamaStackCli) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `existing_username` but received ''"):
+            client.user.with_raw_response.update(
+                existing_username="",
+            )
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_method_delete(self, client: LlamaStackCli) -> None:
+        user = client.user.delete(
+            "username",
+        )
+        assert user is None
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_raw_response_delete(self, client: LlamaStackCli) -> None:
+        response = client.user.with_raw_response.delete(
+            "username",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        user = response.parse()
+        assert user is None
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_streaming_response_delete(self, client: LlamaStackCli) -> None:
+        with client.user.with_streaming_response.delete(
+            "username",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            user = response.parse()
+            assert user is None
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_path_params_delete(self, client: LlamaStackCli) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `username` but received ''"):
+            client.user.with_raw_response.delete(
+                "",
+            )
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_method_create_with_list(self, client: LlamaStackCli) -> None:
+        user = client.user.create_with_list()
+        assert_matches_type(User, user, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_method_create_with_list_with_all_params(self, client: LlamaStackCli) -> None:
+        user = client.user.create_with_list(
+            items=[
+                {
+                    "id": 10,
+                    "email": "john@email.com",
+                    "first_name": "John",
+                    "last_name": "James",
+                    "password": "12345",
+                    "phone": "12345",
+                    "username": "theUser",
+                    "user_status": 1,
+                }
+            ],
+        )
+        assert_matches_type(User, user, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_raw_response_create_with_list(self, client: LlamaStackCli) -> None:
+        response = client.user.with_raw_response.create_with_list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        user = response.parse()
+        assert_matches_type(User, user, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_streaming_response_create_with_list(self, client: LlamaStackCli) -> None:
+        with client.user.with_streaming_response.create_with_list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            user = response.parse()
+            assert_matches_type(User, user, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_method_login(self, client: LlamaStackCli) -> None:
+        user = client.user.login()
+        assert_matches_type(str, user, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_method_login_with_all_params(self, client: LlamaStackCli) -> None:
+        user = client.user.login(
+            password="password",
+            username="username",
+        )
+        assert_matches_type(str, user, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_raw_response_login(self, client: LlamaStackCli) -> None:
+        response = client.user.with_raw_response.login()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        user = response.parse()
+        assert_matches_type(str, user, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_streaming_response_login(self, client: LlamaStackCli) -> None:
+        with client.user.with_streaming_response.login() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            user = response.parse()
+            assert_matches_type(str, user, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_method_logout(self, client: LlamaStackCli) -> None:
+        user = client.user.logout()
+        assert user is None
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_raw_response_logout(self, client: LlamaStackCli) -> None:
+        response = client.user.with_raw_response.logout()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        user = response.parse()
+        assert user is None
+
+    @pytest.mark.skip()
+    @parametrize
+    def test_streaming_response_logout(self, client: LlamaStackCli) -> None:
+        with client.user.with_streaming_response.logout() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            user = response.parse()
+            assert user is None
+
+        assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncUser:
+    parametrize = pytest.mark.parametrize(
+        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
+    )
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_method_create(self, async_client: AsyncLlamaStackCli) -> None:
+        user = await async_client.user.create()
+        assert_matches_type(User, user, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncLlamaStackCli) -> None:
+        user = await async_client.user.create(
+            id=10,
+            email="john@email.com",
+            first_name="John",
+            last_name="James",
+            password="12345",
+            phone="12345",
+            username="theUser",
+            user_status=1,
+        )
+        assert_matches_type(User, user, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncLlamaStackCli) -> None:
+        response = await async_client.user.with_raw_response.create()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        user = await response.parse()
+        assert_matches_type(User, user, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncLlamaStackCli) -> None:
+        async with async_client.user.with_streaming_response.create() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            user = await response.parse()
+            assert_matches_type(User, user, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncLlamaStackCli) -> None:
+        user = await async_client.user.retrieve(
+            "username",
+        )
+        assert_matches_type(User, user, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackCli) -> None:
+        response = await async_client.user.with_raw_response.retrieve(
+            "username",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        user = await response.parse()
+        assert_matches_type(User, user, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackCli) -> None:
+        async with async_client.user.with_streaming_response.retrieve(
+            "username",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            user = await response.parse()
+            assert_matches_type(User, user, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncLlamaStackCli) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `username` but received ''"):
+            await async_client.user.with_raw_response.retrieve(
+                "",
+            )
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_method_update(self, async_client: AsyncLlamaStackCli) -> None:
+        user = await async_client.user.update(
+            existing_username="username",
+        )
+        assert user is None
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_method_update_with_all_params(self, async_client: AsyncLlamaStackCli) -> None:
+        user = await async_client.user.update(
+            existing_username="username",
+            id=10,
+            email="john@email.com",
+            first_name="John",
+            last_name="James",
+            password="12345",
+            phone="12345",
+            username="theUser",
+            user_status=1,
+        )
+        assert user is None
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_raw_response_update(self, async_client: AsyncLlamaStackCli) -> None:
+        response = await async_client.user.with_raw_response.update(
+            existing_username="username",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        user = await response.parse()
+        assert user is None
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_streaming_response_update(self, async_client: AsyncLlamaStackCli) -> None:
+        async with async_client.user.with_streaming_response.update(
+            existing_username="username",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            user = await response.parse()
+            assert user is None
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_path_params_update(self, async_client: AsyncLlamaStackCli) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `existing_username` but received ''"):
+            await async_client.user.with_raw_response.update(
+                existing_username="",
+            )
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_method_delete(self, async_client: AsyncLlamaStackCli) -> None:
+        user = await async_client.user.delete(
+            "username",
+        )
+        assert user is None
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_raw_response_delete(self, async_client: AsyncLlamaStackCli) -> None:
+        response = await async_client.user.with_raw_response.delete(
+            "username",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        user = await response.parse()
+        assert user is None
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_streaming_response_delete(self, async_client: AsyncLlamaStackCli) -> None:
+        async with async_client.user.with_streaming_response.delete(
+            "username",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            user = await response.parse()
+            assert user is None
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_path_params_delete(self, async_client: AsyncLlamaStackCli) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `username` but received ''"):
+            await async_client.user.with_raw_response.delete(
+                "",
+            )
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_method_create_with_list(self, async_client: AsyncLlamaStackCli) -> None:
+        user = await async_client.user.create_with_list()
+        assert_matches_type(User, user, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_method_create_with_list_with_all_params(self, async_client: AsyncLlamaStackCli) -> None:
+        user = await async_client.user.create_with_list(
+            items=[
+                {
+                    "id": 10,
+                    "email": "john@email.com",
+                    "first_name": "John",
+                    "last_name": "James",
+                    "password": "12345",
+                    "phone": "12345",
+                    "username": "theUser",
+                    "user_status": 1,
+                }
+            ],
+        )
+        assert_matches_type(User, user, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_raw_response_create_with_list(self, async_client: AsyncLlamaStackCli) -> None:
+        response = await async_client.user.with_raw_response.create_with_list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        user = await response.parse()
+        assert_matches_type(User, user, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_streaming_response_create_with_list(self, async_client: AsyncLlamaStackCli) -> None:
+        async with async_client.user.with_streaming_response.create_with_list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            user = await response.parse()
+            assert_matches_type(User, user, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_method_login(self, async_client: AsyncLlamaStackCli) -> None:
+        user = await async_client.user.login()
+        assert_matches_type(str, user, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_method_login_with_all_params(self, async_client: AsyncLlamaStackCli) -> None:
+        user = await async_client.user.login(
+            password="password",
+            username="username",
+        )
+        assert_matches_type(str, user, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_raw_response_login(self, async_client: AsyncLlamaStackCli) -> None:
+        response = await async_client.user.with_raw_response.login()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        user = await response.parse()
+        assert_matches_type(str, user, path=["response"])
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_streaming_response_login(self, async_client: AsyncLlamaStackCli) -> None:
+        async with async_client.user.with_streaming_response.login() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            user = await response.parse()
+            assert_matches_type(str, user, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_method_logout(self, async_client: AsyncLlamaStackCli) -> None:
+        user = await async_client.user.logout()
+        assert user is None
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_raw_response_logout(self, async_client: AsyncLlamaStackCli) -> None:
+        response = await async_client.user.with_raw_response.logout()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        user = await response.parse()
+        assert user is None
+
+    @pytest.mark.skip()
+    @parametrize
+    async def test_streaming_response_logout(self, async_client: AsyncLlamaStackCli) -> None:
+        async with async_client.user.with_streaming_response.logout() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            user = await response.parse()
+            assert user is None
+
+        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_vector_dbs.py b/tests/api_resources/test_vector_dbs.py
deleted file mode 100644
index 68d6be89..00000000
--- a/tests/api_resources/test_vector_dbs.py
+++ /dev/null
@@ -1,320 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import (
-    VectorDBListResponse,
-    VectorDBRegisterResponse,
-    VectorDBRetrieveResponse,
-)
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestVectorDBs:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_retrieve(self, client: LlamaStackClient) -> None:
-        vector_db = client.vector_dbs.retrieve(
-            "vector_db_id",
-        )
-        assert_matches_type(VectorDBRetrieveResponse, vector_db, path=["response"])
-
-    @parametrize
-    def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
-        response = client.vector_dbs.with_raw_response.retrieve(
-            "vector_db_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        vector_db = response.parse()
-        assert_matches_type(VectorDBRetrieveResponse, vector_db, path=["response"])
-
-    @parametrize
-    def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
-        with client.vector_dbs.with_streaming_response.retrieve(
-            "vector_db_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            vector_db = response.parse()
-            assert_matches_type(VectorDBRetrieveResponse, vector_db, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_db_id` but received ''"):
-            client.vector_dbs.with_raw_response.retrieve(
-                "",
-            )
-
-    @parametrize
-    def test_method_list(self, client: LlamaStackClient) -> None:
-        vector_db = client.vector_dbs.list()
-        assert_matches_type(VectorDBListResponse, vector_db, path=["response"])
-
-    @parametrize
-    def test_raw_response_list(self, client: LlamaStackClient) -> None:
-        response = client.vector_dbs.with_raw_response.list()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        vector_db = response.parse()
-        assert_matches_type(VectorDBListResponse, vector_db, path=["response"])
-
-    @parametrize
-    def test_streaming_response_list(self, client: LlamaStackClient) -> None:
-        with client.vector_dbs.with_streaming_response.list() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            vector_db = response.parse()
-            assert_matches_type(VectorDBListResponse, vector_db, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_register(self, client: LlamaStackClient) -> None:
-        vector_db = client.vector_dbs.register(
-            embedding_model="embedding_model",
-            vector_db_id="vector_db_id",
-        )
-        assert_matches_type(VectorDBRegisterResponse, vector_db, path=["response"])
-
-    @parametrize
-    def test_method_register_with_all_params(self, client: LlamaStackClient) -> None:
-        vector_db = client.vector_dbs.register(
-            embedding_model="embedding_model",
-            vector_db_id="vector_db_id",
-            embedding_dimension=0,
-            provider_id="provider_id",
-            provider_vector_db_id="provider_vector_db_id",
-        )
-        assert_matches_type(VectorDBRegisterResponse, vector_db, path=["response"])
-
-    @parametrize
-    def test_raw_response_register(self, client: LlamaStackClient) -> None:
-        response = client.vector_dbs.with_raw_response.register(
-            embedding_model="embedding_model",
-            vector_db_id="vector_db_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        vector_db = response.parse()
-        assert_matches_type(VectorDBRegisterResponse, vector_db, path=["response"])
-
-    @parametrize
-    def test_streaming_response_register(self, client: LlamaStackClient) -> None:
-        with client.vector_dbs.with_streaming_response.register(
-            embedding_model="embedding_model",
-            vector_db_id="vector_db_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            vector_db = response.parse()
-            assert_matches_type(VectorDBRegisterResponse, vector_db, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_unregister(self, client: LlamaStackClient) -> None:
-        vector_db = client.vector_dbs.unregister(
-            "vector_db_id",
-        )
-        assert vector_db is None
-
-    @parametrize
-    def test_raw_response_unregister(self, client: LlamaStackClient) -> None:
-        response = client.vector_dbs.with_raw_response.unregister(
-            "vector_db_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        vector_db = response.parse()
-        assert vector_db is None
-
-    @parametrize
-    def test_streaming_response_unregister(self, client: LlamaStackClient) -> None:
-        with client.vector_dbs.with_streaming_response.unregister(
-            "vector_db_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            vector_db = response.parse()
-            assert vector_db is None
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_unregister(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_db_id` but received ''"):
-            client.vector_dbs.with_raw_response.unregister(
-                "",
-            )
-
-
-class TestAsyncVectorDBs:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        vector_db = await async_client.vector_dbs.retrieve(
-            "vector_db_id",
-        )
-        assert_matches_type(VectorDBRetrieveResponse, vector_db, path=["response"])
-
-    @parametrize
-    async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.vector_dbs.with_raw_response.retrieve(
-            "vector_db_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        vector_db = await response.parse()
-        assert_matches_type(VectorDBRetrieveResponse, vector_db, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.vector_dbs.with_streaming_response.retrieve(
-            "vector_db_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            vector_db = await response.parse()
-            assert_matches_type(VectorDBRetrieveResponse, vector_db, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_db_id` but received ''"):
-            await async_client.vector_dbs.with_raw_response.retrieve(
-                "",
-            )
-
-    @parametrize
-    async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
-        vector_db = await async_client.vector_dbs.list()
-        assert_matches_type(VectorDBListResponse, vector_db, path=["response"])
-
-    @parametrize
-    async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.vector_dbs.with_raw_response.list()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        vector_db = await response.parse()
-        assert_matches_type(VectorDBListResponse, vector_db, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.vector_dbs.with_streaming_response.list() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            vector_db = await response.parse()
-            assert_matches_type(VectorDBListResponse, vector_db, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_register(self, async_client: AsyncLlamaStackClient) -> None:
-        vector_db = await async_client.vector_dbs.register(
-            embedding_model="embedding_model",
-            vector_db_id="vector_db_id",
-        )
-        assert_matches_type(VectorDBRegisterResponse, vector_db, path=["response"])
-
-    @parametrize
-    async def test_method_register_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        vector_db = await async_client.vector_dbs.register(
-            embedding_model="embedding_model",
-            vector_db_id="vector_db_id",
-            embedding_dimension=0,
-            provider_id="provider_id",
-            provider_vector_db_id="provider_vector_db_id",
-        )
-        assert_matches_type(VectorDBRegisterResponse, vector_db, path=["response"])
-
-    @parametrize
-    async def test_raw_response_register(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.vector_dbs.with_raw_response.register(
-            embedding_model="embedding_model",
-            vector_db_id="vector_db_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        vector_db = await response.parse()
-        assert_matches_type(VectorDBRegisterResponse, vector_db, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_register(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.vector_dbs.with_streaming_response.register(
-            embedding_model="embedding_model",
-            vector_db_id="vector_db_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            vector_db = await response.parse()
-            assert_matches_type(VectorDBRegisterResponse, vector_db, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_unregister(self, async_client: AsyncLlamaStackClient) -> None:
-        vector_db = await async_client.vector_dbs.unregister(
-            "vector_db_id",
-        )
-        assert vector_db is None
-
-    @parametrize
-    async def test_raw_response_unregister(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.vector_dbs.with_raw_response.unregister(
-            "vector_db_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        vector_db = await response.parse()
-        assert vector_db is None
-
-    @parametrize
-    async def test_streaming_response_unregister(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.vector_dbs.with_streaming_response.unregister(
-            "vector_db_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            vector_db = await response.parse()
-            assert vector_db is None
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_unregister(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_db_id` but received ''"):
-            await async_client.vector_dbs.with_raw_response.unregister(
-                "",
-            )
diff --git a/tests/api_resources/test_vector_io.py b/tests/api_resources/test_vector_io.py
deleted file mode 100644
index c62a58d3..00000000
--- a/tests/api_resources/test_vector_io.py
+++ /dev/null
@@ -1,266 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import QueryChunksResponse
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestVectorIo:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_insert(self, client: LlamaStackClient) -> None:
-        vector_io = client.vector_io.insert(
-            chunks=[
-                {
-                    "content": "string",
-                    "metadata": {"foo": True},
-                }
-            ],
-            vector_db_id="vector_db_id",
-        )
-        assert vector_io is None
-
-    @parametrize
-    def test_method_insert_with_all_params(self, client: LlamaStackClient) -> None:
-        vector_io = client.vector_io.insert(
-            chunks=[
-                {
-                    "content": "string",
-                    "metadata": {"foo": True},
-                    "chunk_metadata": {
-                        "chunk_embedding_dimension": 0,
-                        "chunk_embedding_model": "chunk_embedding_model",
-                        "chunk_id": "chunk_id",
-                        "chunk_tokenizer": "chunk_tokenizer",
-                        "chunk_window": "chunk_window",
-                        "content_token_count": 0,
-                        "created_timestamp": 0,
-                        "document_id": "document_id",
-                        "metadata_token_count": 0,
-                        "source": "source",
-                        "updated_timestamp": 0,
-                    },
-                    "embedding": [0],
-                    "stored_chunk_id": "stored_chunk_id",
-                }
-            ],
-            vector_db_id="vector_db_id",
-            ttl_seconds=0,
-        )
-        assert vector_io is None
-
-    @parametrize
-    def test_raw_response_insert(self, client: LlamaStackClient) -> None:
-        response = client.vector_io.with_raw_response.insert(
-            chunks=[
-                {
-                    "content": "string",
-                    "metadata": {"foo": True},
-                }
-            ],
-            vector_db_id="vector_db_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        vector_io = response.parse()
-        assert vector_io is None
-
-    @parametrize
-    def test_streaming_response_insert(self, client: LlamaStackClient) -> None:
-        with client.vector_io.with_streaming_response.insert(
-            chunks=[
-                {
-                    "content": "string",
-                    "metadata": {"foo": True},
-                }
-            ],
-            vector_db_id="vector_db_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            vector_io = response.parse()
-            assert vector_io is None
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_query(self, client: LlamaStackClient) -> None:
-        vector_io = client.vector_io.query(
-            query="string",
-            vector_db_id="vector_db_id",
-        )
-        assert_matches_type(QueryChunksResponse, vector_io, path=["response"])
-
-    @parametrize
-    def test_method_query_with_all_params(self, client: LlamaStackClient) -> None:
-        vector_io = client.vector_io.query(
-            query="string",
-            vector_db_id="vector_db_id",
-            params={"foo": True},
-        )
-        assert_matches_type(QueryChunksResponse, vector_io, path=["response"])
-
-    @parametrize
-    def test_raw_response_query(self, client: LlamaStackClient) -> None:
-        response = client.vector_io.with_raw_response.query(
-            query="string",
-            vector_db_id="vector_db_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        vector_io = response.parse()
-        assert_matches_type(QueryChunksResponse, vector_io, path=["response"])
-
-    @parametrize
-    def test_streaming_response_query(self, client: LlamaStackClient) -> None:
-        with client.vector_io.with_streaming_response.query(
-            query="string",
-            vector_db_id="vector_db_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            vector_io = response.parse()
-            assert_matches_type(QueryChunksResponse, vector_io, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-
-class TestAsyncVectorIo:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_insert(self, async_client: AsyncLlamaStackClient) -> None:
-        vector_io = await async_client.vector_io.insert(
-            chunks=[
-                {
-                    "content": "string",
-                    "metadata": {"foo": True},
-                }
-            ],
-            vector_db_id="vector_db_id",
-        )
-        assert vector_io is None
-
-    @parametrize
-    async def test_method_insert_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        vector_io = await async_client.vector_io.insert(
-            chunks=[
-                {
-                    "content": "string",
-                    "metadata": {"foo": True},
-                    "chunk_metadata": {
-                        "chunk_embedding_dimension": 0,
-                        "chunk_embedding_model": "chunk_embedding_model",
-                        "chunk_id": "chunk_id",
-                        "chunk_tokenizer": "chunk_tokenizer",
-                        "chunk_window": "chunk_window",
-                        "content_token_count": 0,
-                        "created_timestamp": 0,
-                        "document_id": "document_id",
-                        "metadata_token_count": 0,
-                        "source": "source",
-                        "updated_timestamp": 0,
-                    },
-                    "embedding": [0],
-                    "stored_chunk_id": "stored_chunk_id",
-                }
-            ],
-            vector_db_id="vector_db_id",
-            ttl_seconds=0,
-        )
-        assert vector_io is None
-
-    @parametrize
-    async def test_raw_response_insert(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.vector_io.with_raw_response.insert(
-            chunks=[
-                {
-                    "content": "string",
-                    "metadata": {"foo": True},
-                }
-            ],
-            vector_db_id="vector_db_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        vector_io = await response.parse()
-        assert vector_io is None
-
-    @parametrize
-    async def test_streaming_response_insert(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.vector_io.with_streaming_response.insert(
-            chunks=[
-                {
-                    "content": "string",
-                    "metadata": {"foo": True},
-                }
-            ],
-            vector_db_id="vector_db_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            vector_io = await response.parse()
-            assert vector_io is None
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_query(self, async_client: AsyncLlamaStackClient) -> None:
-        vector_io = await async_client.vector_io.query(
-            query="string",
-            vector_db_id="vector_db_id",
-        )
-        assert_matches_type(QueryChunksResponse, vector_io, path=["response"])
-
-    @parametrize
-    async def test_method_query_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        vector_io = await async_client.vector_io.query(
-            query="string",
-            vector_db_id="vector_db_id",
-            params={"foo": True},
-        )
-        assert_matches_type(QueryChunksResponse, vector_io, path=["response"])
-
-    @parametrize
-    async def test_raw_response_query(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.vector_io.with_raw_response.query(
-            query="string",
-            vector_db_id="vector_db_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        vector_io = await response.parse()
-        assert_matches_type(QueryChunksResponse, vector_io, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_query(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.vector_io.with_streaming_response.query(
-            query="string",
-            vector_db_id="vector_db_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            vector_io = await response.parse()
-            assert_matches_type(QueryChunksResponse, vector_io, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/test_vector_stores.py b/tests/api_resources/test_vector_stores.py
deleted file mode 100644
index 84324ca4..00000000
--- a/tests/api_resources/test_vector_stores.py
+++ /dev/null
@@ -1,555 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types import (
-    VectorStore,
-    ListVectorStoresResponse,
-    VectorStoreDeleteResponse,
-    VectorStoreSearchResponse,
-)
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestVectorStores:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_create(self, client: LlamaStackClient) -> None:
-        vector_store = client.vector_stores.create(
-            name="name",
-        )
-        assert_matches_type(VectorStore, vector_store, path=["response"])
-
-    @parametrize
-    def test_method_create_with_all_params(self, client: LlamaStackClient) -> None:
-        vector_store = client.vector_stores.create(
-            name="name",
-            chunking_strategy={"foo": True},
-            embedding_dimension=0,
-            embedding_model="embedding_model",
-            expires_after={"foo": True},
-            file_ids=["string"],
-            metadata={"foo": True},
-            provider_id="provider_id",
-            provider_vector_db_id="provider_vector_db_id",
-        )
-        assert_matches_type(VectorStore, vector_store, path=["response"])
-
-    @parametrize
-    def test_raw_response_create(self, client: LlamaStackClient) -> None:
-        response = client.vector_stores.with_raw_response.create(
-            name="name",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        vector_store = response.parse()
-        assert_matches_type(VectorStore, vector_store, path=["response"])
-
-    @parametrize
-    def test_streaming_response_create(self, client: LlamaStackClient) -> None:
-        with client.vector_stores.with_streaming_response.create(
-            name="name",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            vector_store = response.parse()
-            assert_matches_type(VectorStore, vector_store, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_retrieve(self, client: LlamaStackClient) -> None:
-        vector_store = client.vector_stores.retrieve(
-            "vector_store_id",
-        )
-        assert_matches_type(VectorStore, vector_store, path=["response"])
-
-    @parametrize
-    def test_raw_response_retrieve(self, client: LlamaStackClient) -> None:
-        response = client.vector_stores.with_raw_response.retrieve(
-            "vector_store_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        vector_store = response.parse()
-        assert_matches_type(VectorStore, vector_store, path=["response"])
-
-    @parametrize
-    def test_streaming_response_retrieve(self, client: LlamaStackClient) -> None:
-        with client.vector_stores.with_streaming_response.retrieve(
-            "vector_store_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            vector_store = response.parse()
-            assert_matches_type(VectorStore, vector_store, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_retrieve(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            client.vector_stores.with_raw_response.retrieve(
-                "",
-            )
-
-    @parametrize
-    def test_method_update(self, client: LlamaStackClient) -> None:
-        vector_store = client.vector_stores.update(
-            vector_store_id="vector_store_id",
-        )
-        assert_matches_type(VectorStore, vector_store, path=["response"])
-
-    @parametrize
-    def test_method_update_with_all_params(self, client: LlamaStackClient) -> None:
-        vector_store = client.vector_stores.update(
-            vector_store_id="vector_store_id",
-            expires_after={"foo": True},
-            metadata={"foo": True},
-            name="name",
-        )
-        assert_matches_type(VectorStore, vector_store, path=["response"])
-
-    @parametrize
-    def test_raw_response_update(self, client: LlamaStackClient) -> None:
-        response = client.vector_stores.with_raw_response.update(
-            vector_store_id="vector_store_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        vector_store = response.parse()
-        assert_matches_type(VectorStore, vector_store, path=["response"])
-
-    @parametrize
-    def test_streaming_response_update(self, client: LlamaStackClient) -> None:
-        with client.vector_stores.with_streaming_response.update(
-            vector_store_id="vector_store_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            vector_store = response.parse()
-            assert_matches_type(VectorStore, vector_store, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_update(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            client.vector_stores.with_raw_response.update(
-                vector_store_id="",
-            )
-
-    @parametrize
-    def test_method_list(self, client: LlamaStackClient) -> None:
-        vector_store = client.vector_stores.list()
-        assert_matches_type(ListVectorStoresResponse, vector_store, path=["response"])
-
-    @parametrize
-    def test_method_list_with_all_params(self, client: LlamaStackClient) -> None:
-        vector_store = client.vector_stores.list(
-            after="after",
-            before="before",
-            limit=0,
-            order="order",
-        )
-        assert_matches_type(ListVectorStoresResponse, vector_store, path=["response"])
-
-    @parametrize
-    def test_raw_response_list(self, client: LlamaStackClient) -> None:
-        response = client.vector_stores.with_raw_response.list()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        vector_store = response.parse()
-        assert_matches_type(ListVectorStoresResponse, vector_store, path=["response"])
-
-    @parametrize
-    def test_streaming_response_list(self, client: LlamaStackClient) -> None:
-        with client.vector_stores.with_streaming_response.list() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            vector_store = response.parse()
-            assert_matches_type(ListVectorStoresResponse, vector_store, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_delete(self, client: LlamaStackClient) -> None:
-        vector_store = client.vector_stores.delete(
-            "vector_store_id",
-        )
-        assert_matches_type(VectorStoreDeleteResponse, vector_store, path=["response"])
-
-    @parametrize
-    def test_raw_response_delete(self, client: LlamaStackClient) -> None:
-        response = client.vector_stores.with_raw_response.delete(
-            "vector_store_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        vector_store = response.parse()
-        assert_matches_type(VectorStoreDeleteResponse, vector_store, path=["response"])
-
-    @parametrize
-    def test_streaming_response_delete(self, client: LlamaStackClient) -> None:
-        with client.vector_stores.with_streaming_response.delete(
-            "vector_store_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            vector_store = response.parse()
-            assert_matches_type(VectorStoreDeleteResponse, vector_store, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_delete(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            client.vector_stores.with_raw_response.delete(
-                "",
-            )
-
-    @parametrize
-    def test_method_search(self, client: LlamaStackClient) -> None:
-        vector_store = client.vector_stores.search(
-            vector_store_id="vector_store_id",
-            query="string",
-        )
-        assert_matches_type(VectorStoreSearchResponse, vector_store, path=["response"])
-
-    @parametrize
-    def test_method_search_with_all_params(self, client: LlamaStackClient) -> None:
-        vector_store = client.vector_stores.search(
-            vector_store_id="vector_store_id",
-            query="string",
-            filters={"foo": True},
-            max_num_results=0,
-            ranking_options={
-                "ranker": "ranker",
-                "score_threshold": 0,
-            },
-            rewrite_query=True,
-            search_mode="search_mode",
-        )
-        assert_matches_type(VectorStoreSearchResponse, vector_store, path=["response"])
-
-    @parametrize
-    def test_raw_response_search(self, client: LlamaStackClient) -> None:
-        response = client.vector_stores.with_raw_response.search(
-            vector_store_id="vector_store_id",
-            query="string",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        vector_store = response.parse()
-        assert_matches_type(VectorStoreSearchResponse, vector_store, path=["response"])
-
-    @parametrize
-    def test_streaming_response_search(self, client: LlamaStackClient) -> None:
-        with client.vector_stores.with_streaming_response.search(
-            vector_store_id="vector_store_id",
-            query="string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            vector_store = response.parse()
-            assert_matches_type(VectorStoreSearchResponse, vector_store, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_search(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            client.vector_stores.with_raw_response.search(
-                vector_store_id="",
-                query="string",
-            )
-
-
-class TestAsyncVectorStores:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_create(self, async_client: AsyncLlamaStackClient) -> None:
-        vector_store = await async_client.vector_stores.create(
-            name="name",
-        )
-        assert_matches_type(VectorStore, vector_store, path=["response"])
-
-    @parametrize
-    async def test_method_create_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        vector_store = await async_client.vector_stores.create(
-            name="name",
-            chunking_strategy={"foo": True},
-            embedding_dimension=0,
-            embedding_model="embedding_model",
-            expires_after={"foo": True},
-            file_ids=["string"],
-            metadata={"foo": True},
-            provider_id="provider_id",
-            provider_vector_db_id="provider_vector_db_id",
-        )
-        assert_matches_type(VectorStore, vector_store, path=["response"])
-
-    @parametrize
-    async def test_raw_response_create(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.vector_stores.with_raw_response.create(
-            name="name",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        vector_store = await response.parse()
-        assert_matches_type(VectorStore, vector_store, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_create(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.vector_stores.with_streaming_response.create(
-            name="name",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            vector_store = await response.parse()
-            assert_matches_type(VectorStore, vector_store, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        vector_store = await async_client.vector_stores.retrieve(
-            "vector_store_id",
-        )
-        assert_matches_type(VectorStore, vector_store, path=["response"])
-
-    @parametrize
-    async def test_raw_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.vector_stores.with_raw_response.retrieve(
-            "vector_store_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        vector_store = await response.parse()
-        assert_matches_type(VectorStore, vector_store, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.vector_stores.with_streaming_response.retrieve(
-            "vector_store_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            vector_store = await response.parse()
-            assert_matches_type(VectorStore, vector_store, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_retrieve(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            await async_client.vector_stores.with_raw_response.retrieve(
-                "",
-            )
-
-    @parametrize
-    async def test_method_update(self, async_client: AsyncLlamaStackClient) -> None:
-        vector_store = await async_client.vector_stores.update(
-            vector_store_id="vector_store_id",
-        )
-        assert_matches_type(VectorStore, vector_store, path=["response"])
-
-    @parametrize
-    async def test_method_update_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        vector_store = await async_client.vector_stores.update(
-            vector_store_id="vector_store_id",
-            expires_after={"foo": True},
-            metadata={"foo": True},
-            name="name",
-        )
-        assert_matches_type(VectorStore, vector_store, path=["response"])
-
-    @parametrize
-    async def test_raw_response_update(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.vector_stores.with_raw_response.update(
-            vector_store_id="vector_store_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        vector_store = await response.parse()
-        assert_matches_type(VectorStore, vector_store, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_update(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.vector_stores.with_streaming_response.update(
-            vector_store_id="vector_store_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            vector_store = await response.parse()
-            assert_matches_type(VectorStore, vector_store, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_update(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            await async_client.vector_stores.with_raw_response.update(
-                vector_store_id="",
-            )
-
-    @parametrize
-    async def test_method_list(self, async_client: AsyncLlamaStackClient) -> None:
-        vector_store = await async_client.vector_stores.list()
-        assert_matches_type(ListVectorStoresResponse, vector_store, path=["response"])
-
-    @parametrize
-    async def test_method_list_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        vector_store = await async_client.vector_stores.list(
-            after="after",
-            before="before",
-            limit=0,
-            order="order",
-        )
-        assert_matches_type(ListVectorStoresResponse, vector_store, path=["response"])
-
-    @parametrize
-    async def test_raw_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.vector_stores.with_raw_response.list()
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        vector_store = await response.parse()
-        assert_matches_type(ListVectorStoresResponse, vector_store, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_list(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.vector_stores.with_streaming_response.list() as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            vector_store = await response.parse()
-            assert_matches_type(ListVectorStoresResponse, vector_store, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_delete(self, async_client: AsyncLlamaStackClient) -> None:
-        vector_store = await async_client.vector_stores.delete(
-            "vector_store_id",
-        )
-        assert_matches_type(VectorStoreDeleteResponse, vector_store, path=["response"])
-
-    @parametrize
-    async def test_raw_response_delete(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.vector_stores.with_raw_response.delete(
-            "vector_store_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        vector_store = await response.parse()
-        assert_matches_type(VectorStoreDeleteResponse, vector_store, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_delete(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.vector_stores.with_streaming_response.delete(
-            "vector_store_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            vector_store = await response.parse()
-            assert_matches_type(VectorStoreDeleteResponse, vector_store, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_delete(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            await async_client.vector_stores.with_raw_response.delete(
-                "",
-            )
-
-    @parametrize
-    async def test_method_search(self, async_client: AsyncLlamaStackClient) -> None:
-        vector_store = await async_client.vector_stores.search(
-            vector_store_id="vector_store_id",
-            query="string",
-        )
-        assert_matches_type(VectorStoreSearchResponse, vector_store, path=["response"])
-
-    @parametrize
-    async def test_method_search_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        vector_store = await async_client.vector_stores.search(
-            vector_store_id="vector_store_id",
-            query="string",
-            filters={"foo": True},
-            max_num_results=0,
-            ranking_options={
-                "ranker": "ranker",
-                "score_threshold": 0,
-            },
-            rewrite_query=True,
-            search_mode="search_mode",
-        )
-        assert_matches_type(VectorStoreSearchResponse, vector_store, path=["response"])
-
-    @parametrize
-    async def test_raw_response_search(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.vector_stores.with_raw_response.search(
-            vector_store_id="vector_store_id",
-            query="string",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        vector_store = await response.parse()
-        assert_matches_type(VectorStoreSearchResponse, vector_store, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_search(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.vector_stores.with_streaming_response.search(
-            vector_store_id="vector_store_id",
-            query="string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            vector_store = await response.parse()
-            assert_matches_type(VectorStoreSearchResponse, vector_store, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_search(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            await async_client.vector_stores.with_raw_response.search(
-                vector_store_id="",
-                query="string",
-            )
diff --git a/tests/api_resources/tool_runtime/__init__.py b/tests/api_resources/tool_runtime/__init__.py
deleted file mode 100644
index fd8019a9..00000000
--- a/tests/api_resources/tool_runtime/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/tool_runtime/test_rag_tool.py b/tests/api_resources/tool_runtime/test_rag_tool.py
deleted file mode 100644
index 17a64d8e..00000000
--- a/tests/api_resources/tool_runtime/test_rag_tool.py
+++ /dev/null
@@ -1,246 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types.shared import QueryResult
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestRagTool:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_insert(self, client: LlamaStackClient) -> None:
-        rag_tool = client.tool_runtime.rag_tool.insert(
-            chunk_size_in_tokens=0,
-            documents=[
-                {
-                    "content": "string",
-                    "document_id": "document_id",
-                    "metadata": {"foo": True},
-                }
-            ],
-            vector_db_id="vector_db_id",
-        )
-        assert rag_tool is None
-
-    @parametrize
-    def test_raw_response_insert(self, client: LlamaStackClient) -> None:
-        response = client.tool_runtime.rag_tool.with_raw_response.insert(
-            chunk_size_in_tokens=0,
-            documents=[
-                {
-                    "content": "string",
-                    "document_id": "document_id",
-                    "metadata": {"foo": True},
-                }
-            ],
-            vector_db_id="vector_db_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        rag_tool = response.parse()
-        assert rag_tool is None
-
-    @parametrize
-    def test_streaming_response_insert(self, client: LlamaStackClient) -> None:
-        with client.tool_runtime.rag_tool.with_streaming_response.insert(
-            chunk_size_in_tokens=0,
-            documents=[
-                {
-                    "content": "string",
-                    "document_id": "document_id",
-                    "metadata": {"foo": True},
-                }
-            ],
-            vector_db_id="vector_db_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            rag_tool = response.parse()
-            assert rag_tool is None
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_method_query(self, client: LlamaStackClient) -> None:
-        rag_tool = client.tool_runtime.rag_tool.query(
-            content="string",
-            vector_db_ids=["string"],
-        )
-        assert_matches_type(QueryResult, rag_tool, path=["response"])
-
-    @parametrize
-    def test_method_query_with_all_params(self, client: LlamaStackClient) -> None:
-        rag_tool = client.tool_runtime.rag_tool.query(
-            content="string",
-            vector_db_ids=["string"],
-            query_config={
-                "chunk_template": "chunk_template",
-                "max_chunks": 0,
-                "max_tokens_in_context": 0,
-                "query_generator_config": {
-                    "separator": "separator",
-                    "type": "default",
-                },
-                "mode": "mode",
-                "ranker": {
-                    "impact_factor": 0,
-                    "type": "rrf",
-                },
-            },
-        )
-        assert_matches_type(QueryResult, rag_tool, path=["response"])
-
-    @parametrize
-    def test_raw_response_query(self, client: LlamaStackClient) -> None:
-        response = client.tool_runtime.rag_tool.with_raw_response.query(
-            content="string",
-            vector_db_ids=["string"],
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        rag_tool = response.parse()
-        assert_matches_type(QueryResult, rag_tool, path=["response"])
-
-    @parametrize
-    def test_streaming_response_query(self, client: LlamaStackClient) -> None:
-        with client.tool_runtime.rag_tool.with_streaming_response.query(
-            content="string",
-            vector_db_ids=["string"],
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            rag_tool = response.parse()
-            assert_matches_type(QueryResult, rag_tool, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-
-class TestAsyncRagTool:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_insert(self, async_client: AsyncLlamaStackClient) -> None:
-        rag_tool = await async_client.tool_runtime.rag_tool.insert(
-            chunk_size_in_tokens=0,
-            documents=[
-                {
-                    "content": "string",
-                    "document_id": "document_id",
-                    "metadata": {"foo": True},
-                }
-            ],
-            vector_db_id="vector_db_id",
-        )
-        assert rag_tool is None
-
-    @parametrize
-    async def test_raw_response_insert(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.tool_runtime.rag_tool.with_raw_response.insert(
-            chunk_size_in_tokens=0,
-            documents=[
-                {
-                    "content": "string",
-                    "document_id": "document_id",
-                    "metadata": {"foo": True},
-                }
-            ],
-            vector_db_id="vector_db_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        rag_tool = await response.parse()
-        assert rag_tool is None
-
-    @parametrize
-    async def test_streaming_response_insert(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.tool_runtime.rag_tool.with_streaming_response.insert(
-            chunk_size_in_tokens=0,
-            documents=[
-                {
-                    "content": "string",
-                    "document_id": "document_id",
-                    "metadata": {"foo": True},
-                }
-            ],
-            vector_db_id="vector_db_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            rag_tool = await response.parse()
-            assert rag_tool is None
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_method_query(self, async_client: AsyncLlamaStackClient) -> None:
-        rag_tool = await async_client.tool_runtime.rag_tool.query(
-            content="string",
-            vector_db_ids=["string"],
-        )
-        assert_matches_type(QueryResult, rag_tool, path=["response"])
-
-    @parametrize
-    async def test_method_query_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        rag_tool = await async_client.tool_runtime.rag_tool.query(
-            content="string",
-            vector_db_ids=["string"],
-            query_config={
-                "chunk_template": "chunk_template",
-                "max_chunks": 0,
-                "max_tokens_in_context": 0,
-                "query_generator_config": {
-                    "separator": "separator",
-                    "type": "default",
-                },
-                "mode": "mode",
-                "ranker": {
-                    "impact_factor": 0,
-                    "type": "rrf",
-                },
-            },
-        )
-        assert_matches_type(QueryResult, rag_tool, path=["response"])
-
-    @parametrize
-    async def test_raw_response_query(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.tool_runtime.rag_tool.with_raw_response.query(
-            content="string",
-            vector_db_ids=["string"],
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        rag_tool = await response.parse()
-        assert_matches_type(QueryResult, rag_tool, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_query(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.tool_runtime.rag_tool.with_streaming_response.query(
-            content="string",
-            vector_db_ids=["string"],
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            rag_tool = await response.parse()
-            assert_matches_type(QueryResult, rag_tool, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/vector_stores/__init__.py b/tests/api_resources/vector_stores/__init__.py
deleted file mode 100644
index fd8019a9..00000000
--- a/tests/api_resources/vector_stores/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/vector_stores/test_files.py b/tests/api_resources/vector_stores/test_files.py
deleted file mode 100644
index f9728a36..00000000
--- a/tests/api_resources/vector_stores/test_files.py
+++ /dev/null
@@ -1,128 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from tests.utils import assert_matches_type
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client.types.vector_stores import VectorStoreFile
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
-
-
-class TestFiles:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_create(self, client: LlamaStackClient) -> None:
-        file = client.vector_stores.files.create(
-            vector_store_id="vector_store_id",
-            file_id="file_id",
-        )
-        assert_matches_type(VectorStoreFile, file, path=["response"])
-
-    @parametrize
-    def test_method_create_with_all_params(self, client: LlamaStackClient) -> None:
-        file = client.vector_stores.files.create(
-            vector_store_id="vector_store_id",
-            file_id="file_id",
-            attributes={"foo": True},
-            chunking_strategy={"type": "auto"},
-        )
-        assert_matches_type(VectorStoreFile, file, path=["response"])
-
-    @parametrize
-    def test_raw_response_create(self, client: LlamaStackClient) -> None:
-        response = client.vector_stores.files.with_raw_response.create(
-            vector_store_id="vector_store_id",
-            file_id="file_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = response.parse()
-        assert_matches_type(VectorStoreFile, file, path=["response"])
-
-    @parametrize
-    def test_streaming_response_create(self, client: LlamaStackClient) -> None:
-        with client.vector_stores.files.with_streaming_response.create(
-            vector_store_id="vector_store_id",
-            file_id="file_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            file = response.parse()
-            assert_matches_type(VectorStoreFile, file, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_create(self, client: LlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            client.vector_stores.files.with_raw_response.create(
-                vector_store_id="",
-                file_id="file_id",
-            )
-
-
-class TestAsyncFiles:
-    parametrize = pytest.mark.parametrize(
-        "async_client", [False, True, {"http_client": "aiohttp"}], indirect=True, ids=["loose", "strict", "aiohttp"]
-    )
-
-    @parametrize
-    async def test_method_create(self, async_client: AsyncLlamaStackClient) -> None:
-        file = await async_client.vector_stores.files.create(
-            vector_store_id="vector_store_id",
-            file_id="file_id",
-        )
-        assert_matches_type(VectorStoreFile, file, path=["response"])
-
-    @parametrize
-    async def test_method_create_with_all_params(self, async_client: AsyncLlamaStackClient) -> None:
-        file = await async_client.vector_stores.files.create(
-            vector_store_id="vector_store_id",
-            file_id="file_id",
-            attributes={"foo": True},
-            chunking_strategy={"type": "auto"},
-        )
-        assert_matches_type(VectorStoreFile, file, path=["response"])
-
-    @parametrize
-    async def test_raw_response_create(self, async_client: AsyncLlamaStackClient) -> None:
-        response = await async_client.vector_stores.files.with_raw_response.create(
-            vector_store_id="vector_store_id",
-            file_id="file_id",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = await response.parse()
-        assert_matches_type(VectorStoreFile, file, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_create(self, async_client: AsyncLlamaStackClient) -> None:
-        async with async_client.vector_stores.files.with_streaming_response.create(
-            vector_store_id="vector_store_id",
-            file_id="file_id",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            file = await response.parse()
-            assert_matches_type(VectorStoreFile, file, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_create(self, async_client: AsyncLlamaStackClient) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            await async_client.vector_stores.files.with_raw_response.create(
-                vector_store_id="",
-                file_id="file_id",
-            )
diff --git a/tests/conftest.py b/tests/conftest.py
index ddadec32..a4109856 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -10,15 +10,15 @@
 import pytest
 from pytest_asyncio import is_async_test
 
-from llama_stack_client import LlamaStackClient, DefaultAioHttpClient, AsyncLlamaStackClient
-from llama_stack_client._utils import is_dict
+from llama_stack_cli import LlamaStackCli, AsyncLlamaStackCli, DefaultAioHttpClient
+from llama_stack_cli._utils import is_dict
 
 if TYPE_CHECKING:
     from _pytest.fixtures import FixtureRequest  # pyright: ignore[reportPrivateImportUsage]
 
 pytest.register_assert_rewrite("tests.utils")
 
-logging.getLogger("llama_stack_client").setLevel(logging.DEBUG)
+logging.getLogger("llama_stack_cli").setLevel(logging.DEBUG)
 
 
 # automatically add `pytest.mark.asyncio()` to all of our async tests
@@ -45,19 +45,21 @@ def pytest_collection_modifyitems(items: list[pytest.Function]) -> None:
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
 
+api_key = "My API Key"
+
 
 @pytest.fixture(scope="session")
-def client(request: FixtureRequest) -> Iterator[LlamaStackClient]:
+def client(request: FixtureRequest) -> Iterator[LlamaStackCli]:
     strict = getattr(request, "param", True)
     if not isinstance(strict, bool):
         raise TypeError(f"Unexpected fixture parameter type {type(strict)}, expected {bool}")
 
-    with LlamaStackClient(base_url=base_url, _strict_response_validation=strict) as client:
+    with LlamaStackCli(base_url=base_url, api_key=api_key, _strict_response_validation=strict) as client:
         yield client
 
 
 @pytest.fixture(scope="session")
-async def async_client(request: FixtureRequest) -> AsyncIterator[AsyncLlamaStackClient]:
+async def async_client(request: FixtureRequest) -> AsyncIterator[AsyncLlamaStackCli]:
     param = getattr(request, "param", True)
 
     # defaults
@@ -76,7 +78,7 @@ async def async_client(request: FixtureRequest) -> AsyncIterator[AsyncLlamaStack
     else:
         raise TypeError(f"Unexpected fixture parameter type {type(param)}, expected bool or dict")
 
-    async with AsyncLlamaStackClient(
-        base_url=base_url, _strict_response_validation=strict, http_client=http_client
+    async with AsyncLlamaStackCli(
+        base_url=base_url, api_key=api_key, _strict_response_validation=strict, http_client=http_client
     ) as client:
         yield client
diff --git a/tests/test_client.py b/tests/test_client.py
index 6a1a8f85..e5d1e91c 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -21,11 +21,11 @@
 from respx import MockRouter
 from pydantic import ValidationError
 
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient, APIResponseValidationError
-from llama_stack_client._types import Omit
-from llama_stack_client._models import BaseModel, FinalRequestOptions
-from llama_stack_client._exceptions import APIStatusError, APITimeoutError, APIResponseValidationError
-from llama_stack_client._base_client import (
+from llama_stack_cli import LlamaStackCli, AsyncLlamaStackCli, APIResponseValidationError
+from llama_stack_cli._types import Omit
+from llama_stack_cli._models import BaseModel, FinalRequestOptions
+from llama_stack_cli._exceptions import APIStatusError, APITimeoutError, LlamaStackCliError, APIResponseValidationError
+from llama_stack_cli._base_client import (
     DEFAULT_TIMEOUT,
     HTTPX_DEFAULT_TIMEOUT,
     BaseClient,
@@ -37,6 +37,7 @@
 from .utils import update_env
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
+api_key = "My API Key"
 
 
 def _get_params(client: BaseClient[Any, Any]) -> dict[str, str]:
@@ -49,7 +50,7 @@ def _low_retry_timeout(*_args: Any, **_kwargs: Any) -> float:
     return 0.1
 
 
-def _get_open_connections(client: LlamaStackClient | AsyncLlamaStackClient) -> int:
+def _get_open_connections(client: LlamaStackCli | AsyncLlamaStackCli) -> int:
     transport = client._client._transport
     assert isinstance(transport, httpx.HTTPTransport) or isinstance(transport, httpx.AsyncHTTPTransport)
 
@@ -57,8 +58,8 @@ def _get_open_connections(client: LlamaStackClient | AsyncLlamaStackClient) -> i
     return len(pool._requests)
 
 
-class TestLlamaStackClient:
-    client = LlamaStackClient(base_url=base_url, _strict_response_validation=True)
+class TestLlamaStackCli:
+    client = LlamaStackCli(base_url=base_url, api_key=api_key, _strict_response_validation=True)
 
     @pytest.mark.respx(base_url=base_url)
     def test_raw_response(self, respx_mock: MockRouter) -> None:
@@ -84,6 +85,10 @@ def test_copy(self) -> None:
         copied = self.client.copy()
         assert id(copied) != id(self.client)
 
+        copied = self.client.copy(api_key="another My API Key")
+        assert copied.api_key == "another My API Key"
+        assert self.client.api_key == "My API Key"
+
     def test_copy_default_options(self) -> None:
         # options that have a default are overridden correctly
         copied = self.client.copy(max_retries=7)
@@ -101,7 +106,9 @@ def test_copy_default_options(self) -> None:
         assert isinstance(self.client.timeout, httpx.Timeout)
 
     def test_copy_default_headers(self) -> None:
-        client = LlamaStackClient(base_url=base_url, _strict_response_validation=True, default_headers={"X-Foo": "bar"})
+        client = LlamaStackCli(
+            base_url=base_url, api_key=api_key, _strict_response_validation=True, default_headers={"X-Foo": "bar"}
+        )
         assert client.default_headers["X-Foo"] == "bar"
 
         # does not override the already given value when not specified
@@ -133,7 +140,9 @@ def test_copy_default_headers(self) -> None:
             client.copy(set_default_headers={}, default_headers={"X-Foo": "Bar"})
 
     def test_copy_default_query(self) -> None:
-        client = LlamaStackClient(base_url=base_url, _strict_response_validation=True, default_query={"foo": "bar"})
+        client = LlamaStackCli(
+            base_url=base_url, api_key=api_key, _strict_response_validation=True, default_query={"foo": "bar"}
+        )
         assert _get_params(client)["foo"] == "bar"
 
         # does not override the already given value when not specified
@@ -223,10 +232,10 @@ def add_leak(leaks: list[tracemalloc.StatisticDiff], diff: tracemalloc.Statistic
                         # to_raw_response_wrapper leaks through the @functools.wraps() decorator.
                         #
                         # removing the decorator fixes the leak for reasons we don't understand.
-                        "llama_stack_client/_legacy_response.py",
-                        "llama_stack_client/_response.py",
+                        "llama_stack_cli/_legacy_response.py",
+                        "llama_stack_cli/_response.py",
                         # pydantic.BaseModel.model_dump || pydantic.BaseModel.dict leak memory for some reason.
-                        "llama_stack_client/_compat.py",
+                        "llama_stack_cli/_compat.py",
                         # Standard library leaks we don't care about.
                         "/logging/__init__.py",
                     ]
@@ -257,7 +266,9 @@ def test_request_timeout(self) -> None:
         assert timeout == httpx.Timeout(100.0)
 
     def test_client_timeout_option(self) -> None:
-        client = LlamaStackClient(base_url=base_url, _strict_response_validation=True, timeout=httpx.Timeout(0))
+        client = LlamaStackCli(
+            base_url=base_url, api_key=api_key, _strict_response_validation=True, timeout=httpx.Timeout(0)
+        )
 
         request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
         timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
@@ -266,7 +277,9 @@ def test_client_timeout_option(self) -> None:
     def test_http_client_timeout_option(self) -> None:
         # custom timeout given to the httpx client should be used
         with httpx.Client(timeout=None) as http_client:
-            client = LlamaStackClient(base_url=base_url, _strict_response_validation=True, http_client=http_client)
+            client = LlamaStackCli(
+                base_url=base_url, api_key=api_key, _strict_response_validation=True, http_client=http_client
+            )
 
             request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
             timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
@@ -274,7 +287,9 @@ def test_http_client_timeout_option(self) -> None:
 
         # no timeout given to the httpx client should not use the httpx default
         with httpx.Client() as http_client:
-            client = LlamaStackClient(base_url=base_url, _strict_response_validation=True, http_client=http_client)
+            client = LlamaStackCli(
+                base_url=base_url, api_key=api_key, _strict_response_validation=True, http_client=http_client
+            )
 
             request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
             timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
@@ -282,7 +297,9 @@ def test_http_client_timeout_option(self) -> None:
 
         # explicitly passing the default timeout currently results in it being ignored
         with httpx.Client(timeout=HTTPX_DEFAULT_TIMEOUT) as http_client:
-            client = LlamaStackClient(base_url=base_url, _strict_response_validation=True, http_client=http_client)
+            client = LlamaStackCli(
+                base_url=base_url, api_key=api_key, _strict_response_validation=True, http_client=http_client
+            )
 
             request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
             timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
@@ -291,18 +308,24 @@ def test_http_client_timeout_option(self) -> None:
     async def test_invalid_http_client(self) -> None:
         with pytest.raises(TypeError, match="Invalid `http_client` arg"):
             async with httpx.AsyncClient() as http_client:
-                LlamaStackClient(
-                    base_url=base_url, _strict_response_validation=True, http_client=cast(Any, http_client)
+                LlamaStackCli(
+                    base_url=base_url,
+                    api_key=api_key,
+                    _strict_response_validation=True,
+                    http_client=cast(Any, http_client),
                 )
 
     def test_default_headers_option(self) -> None:
-        client = LlamaStackClient(base_url=base_url, _strict_response_validation=True, default_headers={"X-Foo": "bar"})
+        client = LlamaStackCli(
+            base_url=base_url, api_key=api_key, _strict_response_validation=True, default_headers={"X-Foo": "bar"}
+        )
         request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
         assert request.headers.get("x-foo") == "bar"
         assert request.headers.get("x-stainless-lang") == "python"
 
-        client2 = LlamaStackClient(
+        client2 = LlamaStackCli(
             base_url=base_url,
+            api_key=api_key,
             _strict_response_validation=True,
             default_headers={
                 "X-Foo": "stainless",
@@ -313,9 +336,19 @@ def test_default_headers_option(self) -> None:
         assert request.headers.get("x-foo") == "stainless"
         assert request.headers.get("x-stainless-lang") == "my-overriding-header"
 
+    def test_validate_headers(self) -> None:
+        client = LlamaStackCli(base_url=base_url, api_key=api_key, _strict_response_validation=True)
+        request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
+        assert request.headers.get("api_key") == api_key
+
+        with pytest.raises(LlamaStackCliError):
+            with update_env(**{"PETSTORE_API_KEY": Omit()}):
+                client2 = LlamaStackCli(base_url=base_url, api_key=None, _strict_response_validation=True)
+            _ = client2
+
     def test_default_query_option(self) -> None:
-        client = LlamaStackClient(
-            base_url=base_url, _strict_response_validation=True, default_query={"query_param": "bar"}
+        client = LlamaStackCli(
+            base_url=base_url, api_key=api_key, _strict_response_validation=True, default_query={"query_param": "bar"}
         )
         request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
         url = httpx.URL(request.url)
@@ -428,10 +461,10 @@ def test_request_extra_query(self) -> None:
         params = dict(request.url.params)
         assert params == {"foo": "2"}
 
-    def test_multipart_repeating_array(self, client: LlamaStackClient) -> None:
+    def test_multipart_repeating_array(self, client: LlamaStackCli) -> None:
         request = client._build_request(
             FinalRequestOptions.construct(
-                method="get",
+                method="post",
                 url="/foo",
                 headers={"Content-Type": "multipart/form-data; boundary=6b7ba517decee4a450543ea6ae821c82"},
                 json_data={"array": ["foo", "bar"]},
@@ -515,7 +548,9 @@ class Model(BaseModel):
         assert response.foo == 2
 
     def test_base_url_setter(self) -> None:
-        client = LlamaStackClient(base_url="https://example.com/from_init", _strict_response_validation=True)
+        client = LlamaStackCli(
+            base_url="https://example.com/from_init", api_key=api_key, _strict_response_validation=True
+        )
         assert client.base_url == "https://example.com/from_init/"
 
         client.base_url = "https://example.com/from_setter"  # type: ignore[assignment]
@@ -523,23 +558,26 @@ def test_base_url_setter(self) -> None:
         assert client.base_url == "https://example.com/from_setter/"
 
     def test_base_url_env(self) -> None:
-        with update_env(LLAMA_STACK_BASE_URL="http://localhost:5000/from/env"):
-            client = LlamaStackClient(_strict_response_validation=True)
+        with update_env(LLAMA_STACK_CLI_BASE_URL="http://localhost:5000/from/env"):
+            client = LlamaStackCli(api_key=api_key, _strict_response_validation=True)
             assert client.base_url == "http://localhost:5000/from/env/"
 
     @pytest.mark.parametrize(
         "client",
         [
-            LlamaStackClient(base_url="http://localhost:5000/custom/path/", _strict_response_validation=True),
-            LlamaStackClient(
+            LlamaStackCli(
+                base_url="http://localhost:5000/custom/path/", api_key=api_key, _strict_response_validation=True
+            ),
+            LlamaStackCli(
                 base_url="http://localhost:5000/custom/path/",
+                api_key=api_key,
                 _strict_response_validation=True,
                 http_client=httpx.Client(),
             ),
         ],
         ids=["standard", "custom http client"],
     )
-    def test_base_url_trailing_slash(self, client: LlamaStackClient) -> None:
+    def test_base_url_trailing_slash(self, client: LlamaStackCli) -> None:
         request = client._build_request(
             FinalRequestOptions(
                 method="post",
@@ -552,16 +590,19 @@ def test_base_url_trailing_slash(self, client: LlamaStackClient) -> None:
     @pytest.mark.parametrize(
         "client",
         [
-            LlamaStackClient(base_url="http://localhost:5000/custom/path/", _strict_response_validation=True),
-            LlamaStackClient(
+            LlamaStackCli(
+                base_url="http://localhost:5000/custom/path/", api_key=api_key, _strict_response_validation=True
+            ),
+            LlamaStackCli(
                 base_url="http://localhost:5000/custom/path/",
+                api_key=api_key,
                 _strict_response_validation=True,
                 http_client=httpx.Client(),
             ),
         ],
         ids=["standard", "custom http client"],
     )
-    def test_base_url_no_trailing_slash(self, client: LlamaStackClient) -> None:
+    def test_base_url_no_trailing_slash(self, client: LlamaStackCli) -> None:
         request = client._build_request(
             FinalRequestOptions(
                 method="post",
@@ -574,16 +615,19 @@ def test_base_url_no_trailing_slash(self, client: LlamaStackClient) -> None:
     @pytest.mark.parametrize(
         "client",
         [
-            LlamaStackClient(base_url="http://localhost:5000/custom/path/", _strict_response_validation=True),
-            LlamaStackClient(
+            LlamaStackCli(
+                base_url="http://localhost:5000/custom/path/", api_key=api_key, _strict_response_validation=True
+            ),
+            LlamaStackCli(
                 base_url="http://localhost:5000/custom/path/",
+                api_key=api_key,
                 _strict_response_validation=True,
                 http_client=httpx.Client(),
             ),
         ],
         ids=["standard", "custom http client"],
     )
-    def test_absolute_request_url(self, client: LlamaStackClient) -> None:
+    def test_absolute_request_url(self, client: LlamaStackCli) -> None:
         request = client._build_request(
             FinalRequestOptions(
                 method="post",
@@ -594,7 +638,7 @@ def test_absolute_request_url(self, client: LlamaStackClient) -> None:
         assert request.url == "https://myapi.com/foo"
 
     def test_copied_client_does_not_close_http(self) -> None:
-        client = LlamaStackClient(base_url=base_url, _strict_response_validation=True)
+        client = LlamaStackCli(base_url=base_url, api_key=api_key, _strict_response_validation=True)
         assert not client.is_closed()
 
         copied = client.copy()
@@ -605,7 +649,7 @@ def test_copied_client_does_not_close_http(self) -> None:
         assert not client.is_closed()
 
     def test_client_context_manager(self) -> None:
-        client = LlamaStackClient(base_url=base_url, _strict_response_validation=True)
+        client = LlamaStackCli(base_url=base_url, api_key=api_key, _strict_response_validation=True)
         with client as c2:
             assert c2 is client
             assert not c2.is_closed()
@@ -626,7 +670,9 @@ class Model(BaseModel):
 
     def test_client_max_retries_validation(self) -> None:
         with pytest.raises(TypeError, match=r"max_retries cannot be None"):
-            LlamaStackClient(base_url=base_url, _strict_response_validation=True, max_retries=cast(Any, None))
+            LlamaStackCli(
+                base_url=base_url, api_key=api_key, _strict_response_validation=True, max_retries=cast(Any, None)
+            )
 
     @pytest.mark.respx(base_url=base_url)
     def test_received_text_for_expected_json(self, respx_mock: MockRouter) -> None:
@@ -635,12 +681,12 @@ class Model(BaseModel):
 
         respx_mock.get("/foo").mock(return_value=httpx.Response(200, text="my-custom-format"))
 
-        strict_client = LlamaStackClient(base_url=base_url, _strict_response_validation=True)
+        strict_client = LlamaStackCli(base_url=base_url, api_key=api_key, _strict_response_validation=True)
 
         with pytest.raises(APIResponseValidationError):
             strict_client.get("/foo", cast_to=Model)
 
-        client = LlamaStackClient(base_url=base_url, _strict_response_validation=False)
+        client = LlamaStackCli(base_url=base_url, api_key=api_key, _strict_response_validation=False)
 
         response = client.get("/foo", cast_to=Model)
         assert isinstance(response, str)  # type: ignore[unreachable]
@@ -668,55 +714,39 @@ class Model(BaseModel):
     )
     @mock.patch("time.time", mock.MagicMock(return_value=1696004797))
     def test_parse_retry_after_header(self, remaining_retries: int, retry_after: str, timeout: float) -> None:
-        client = LlamaStackClient(base_url=base_url, _strict_response_validation=True)
+        client = LlamaStackCli(base_url=base_url, api_key=api_key, _strict_response_validation=True)
 
         headers = httpx.Headers({"retry-after": retry_after})
         options = FinalRequestOptions(method="get", url="/foo", max_retries=3)
         calculated = client._calculate_retry_timeout(remaining_retries, options, headers)
         assert calculated == pytest.approx(timeout, 0.5 * 0.875)  # pyright: ignore[reportUnknownMemberType]
 
-    @mock.patch("llama_stack_client._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @mock.patch("llama_stack_cli._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
-    def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter, client: LlamaStackClient) -> None:
-        respx_mock.post("/v1/inference/chat-completion").mock(side_effect=httpx.TimeoutException("Test timeout error"))
+    def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter, client: LlamaStackCli) -> None:
+        respx_mock.get("/store/inventory").mock(side_effect=httpx.TimeoutException("Test timeout error"))
 
         with pytest.raises(APITimeoutError):
-            client.inference.with_streaming_response.chat_completion(
-                messages=[
-                    {
-                        "content": "string",
-                        "role": "user",
-                    }
-                ],
-                model_id="model_id",
-            ).__enter__()
+            client.store.with_streaming_response.list_inventory().__enter__()
 
         assert _get_open_connections(self.client) == 0
 
-    @mock.patch("llama_stack_client._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @mock.patch("llama_stack_cli._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
-    def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter, client: LlamaStackClient) -> None:
-        respx_mock.post("/v1/inference/chat-completion").mock(return_value=httpx.Response(500))
+    def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter, client: LlamaStackCli) -> None:
+        respx_mock.get("/store/inventory").mock(return_value=httpx.Response(500))
 
         with pytest.raises(APIStatusError):
-            client.inference.with_streaming_response.chat_completion(
-                messages=[
-                    {
-                        "content": "string",
-                        "role": "user",
-                    }
-                ],
-                model_id="model_id",
-            ).__enter__()
+            client.store.with_streaming_response.list_inventory().__enter__()
         assert _get_open_connections(self.client) == 0
 
     @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
-    @mock.patch("llama_stack_client._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @mock.patch("llama_stack_cli._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
     @pytest.mark.parametrize("failure_mode", ["status", "exception"])
     def test_retries_taken(
         self,
-        client: LlamaStackClient,
+        client: LlamaStackCli,
         failures_before_success: int,
         failure_mode: Literal["status", "exception"],
         respx_mock: MockRouter,
@@ -734,26 +764,18 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
                 return httpx.Response(500)
             return httpx.Response(200)
 
-        respx_mock.post("/v1/inference/chat-completion").mock(side_effect=retry_handler)
+        respx_mock.get("/store/inventory").mock(side_effect=retry_handler)
 
-        response = client.inference.with_raw_response.chat_completion(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            model_id="model_id",
-        )
+        response = client.store.with_raw_response.list_inventory()
 
         assert response.retries_taken == failures_before_success
         assert int(response.http_request.headers.get("x-stainless-retry-count")) == failures_before_success
 
     @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
-    @mock.patch("llama_stack_client._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @mock.patch("llama_stack_cli._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
     def test_omit_retry_count_header(
-        self, client: LlamaStackClient, failures_before_success: int, respx_mock: MockRouter
+        self, client: LlamaStackCli, failures_before_success: int, respx_mock: MockRouter
     ) -> None:
         client = client.with_options(max_retries=4)
 
@@ -766,26 +788,17 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
                 return httpx.Response(500)
             return httpx.Response(200)
 
-        respx_mock.post("/v1/inference/chat-completion").mock(side_effect=retry_handler)
-
-        response = client.inference.with_raw_response.chat_completion(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            model_id="model_id",
-            extra_headers={"x-stainless-retry-count": Omit()},
-        )
+        respx_mock.get("/store/inventory").mock(side_effect=retry_handler)
+
+        response = client.store.with_raw_response.list_inventory(extra_headers={"x-stainless-retry-count": Omit()})
 
         assert len(response.http_request.headers.get_list("x-stainless-retry-count")) == 0
 
     @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
-    @mock.patch("llama_stack_client._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @mock.patch("llama_stack_cli._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
     def test_overwrite_retry_count_header(
-        self, client: LlamaStackClient, failures_before_success: int, respx_mock: MockRouter
+        self, client: LlamaStackCli, failures_before_success: int, respx_mock: MockRouter
     ) -> None:
         client = client.with_options(max_retries=4)
 
@@ -798,18 +811,9 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
                 return httpx.Response(500)
             return httpx.Response(200)
 
-        respx_mock.post("/v1/inference/chat-completion").mock(side_effect=retry_handler)
-
-        response = client.inference.with_raw_response.chat_completion(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            model_id="model_id",
-            extra_headers={"x-stainless-retry-count": "42"},
-        )
+        respx_mock.get("/store/inventory").mock(side_effect=retry_handler)
+
+        response = client.store.with_raw_response.list_inventory(extra_headers={"x-stainless-retry-count": "42"})
 
         assert response.http_request.headers.get("x-stainless-retry-count") == "42"
 
@@ -863,8 +867,8 @@ def test_follow_redirects_disabled(self, respx_mock: MockRouter) -> None:
         assert exc_info.value.response.headers["Location"] == f"{base_url}/redirected"
 
 
-class TestAsyncLlamaStackClient:
-    client = AsyncLlamaStackClient(base_url=base_url, _strict_response_validation=True)
+class TestAsyncLlamaStackCli:
+    client = AsyncLlamaStackCli(base_url=base_url, api_key=api_key, _strict_response_validation=True)
 
     @pytest.mark.respx(base_url=base_url)
     @pytest.mark.asyncio
@@ -892,6 +896,10 @@ def test_copy(self) -> None:
         copied = self.client.copy()
         assert id(copied) != id(self.client)
 
+        copied = self.client.copy(api_key="another My API Key")
+        assert copied.api_key == "another My API Key"
+        assert self.client.api_key == "My API Key"
+
     def test_copy_default_options(self) -> None:
         # options that have a default are overridden correctly
         copied = self.client.copy(max_retries=7)
@@ -909,8 +917,8 @@ def test_copy_default_options(self) -> None:
         assert isinstance(self.client.timeout, httpx.Timeout)
 
     def test_copy_default_headers(self) -> None:
-        client = AsyncLlamaStackClient(
-            base_url=base_url, _strict_response_validation=True, default_headers={"X-Foo": "bar"}
+        client = AsyncLlamaStackCli(
+            base_url=base_url, api_key=api_key, _strict_response_validation=True, default_headers={"X-Foo": "bar"}
         )
         assert client.default_headers["X-Foo"] == "bar"
 
@@ -943,8 +951,8 @@ def test_copy_default_headers(self) -> None:
             client.copy(set_default_headers={}, default_headers={"X-Foo": "Bar"})
 
     def test_copy_default_query(self) -> None:
-        client = AsyncLlamaStackClient(
-            base_url=base_url, _strict_response_validation=True, default_query={"foo": "bar"}
+        client = AsyncLlamaStackCli(
+            base_url=base_url, api_key=api_key, _strict_response_validation=True, default_query={"foo": "bar"}
         )
         assert _get_params(client)["foo"] == "bar"
 
@@ -1035,10 +1043,10 @@ def add_leak(leaks: list[tracemalloc.StatisticDiff], diff: tracemalloc.Statistic
                         # to_raw_response_wrapper leaks through the @functools.wraps() decorator.
                         #
                         # removing the decorator fixes the leak for reasons we don't understand.
-                        "llama_stack_client/_legacy_response.py",
-                        "llama_stack_client/_response.py",
+                        "llama_stack_cli/_legacy_response.py",
+                        "llama_stack_cli/_response.py",
                         # pydantic.BaseModel.model_dump || pydantic.BaseModel.dict leak memory for some reason.
-                        "llama_stack_client/_compat.py",
+                        "llama_stack_cli/_compat.py",
                         # Standard library leaks we don't care about.
                         "/logging/__init__.py",
                     ]
@@ -1069,7 +1077,9 @@ async def test_request_timeout(self) -> None:
         assert timeout == httpx.Timeout(100.0)
 
     async def test_client_timeout_option(self) -> None:
-        client = AsyncLlamaStackClient(base_url=base_url, _strict_response_validation=True, timeout=httpx.Timeout(0))
+        client = AsyncLlamaStackCli(
+            base_url=base_url, api_key=api_key, _strict_response_validation=True, timeout=httpx.Timeout(0)
+        )
 
         request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
         timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
@@ -1078,7 +1088,9 @@ async def test_client_timeout_option(self) -> None:
     async def test_http_client_timeout_option(self) -> None:
         # custom timeout given to the httpx client should be used
         async with httpx.AsyncClient(timeout=None) as http_client:
-            client = AsyncLlamaStackClient(base_url=base_url, _strict_response_validation=True, http_client=http_client)
+            client = AsyncLlamaStackCli(
+                base_url=base_url, api_key=api_key, _strict_response_validation=True, http_client=http_client
+            )
 
             request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
             timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
@@ -1086,7 +1098,9 @@ async def test_http_client_timeout_option(self) -> None:
 
         # no timeout given to the httpx client should not use the httpx default
         async with httpx.AsyncClient() as http_client:
-            client = AsyncLlamaStackClient(base_url=base_url, _strict_response_validation=True, http_client=http_client)
+            client = AsyncLlamaStackCli(
+                base_url=base_url, api_key=api_key, _strict_response_validation=True, http_client=http_client
+            )
 
             request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
             timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
@@ -1094,7 +1108,9 @@ async def test_http_client_timeout_option(self) -> None:
 
         # explicitly passing the default timeout currently results in it being ignored
         async with httpx.AsyncClient(timeout=HTTPX_DEFAULT_TIMEOUT) as http_client:
-            client = AsyncLlamaStackClient(base_url=base_url, _strict_response_validation=True, http_client=http_client)
+            client = AsyncLlamaStackCli(
+                base_url=base_url, api_key=api_key, _strict_response_validation=True, http_client=http_client
+            )
 
             request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
             timeout = httpx.Timeout(**request.extensions["timeout"])  # type: ignore
@@ -1103,20 +1119,24 @@ async def test_http_client_timeout_option(self) -> None:
     def test_invalid_http_client(self) -> None:
         with pytest.raises(TypeError, match="Invalid `http_client` arg"):
             with httpx.Client() as http_client:
-                AsyncLlamaStackClient(
-                    base_url=base_url, _strict_response_validation=True, http_client=cast(Any, http_client)
+                AsyncLlamaStackCli(
+                    base_url=base_url,
+                    api_key=api_key,
+                    _strict_response_validation=True,
+                    http_client=cast(Any, http_client),
                 )
 
     def test_default_headers_option(self) -> None:
-        client = AsyncLlamaStackClient(
-            base_url=base_url, _strict_response_validation=True, default_headers={"X-Foo": "bar"}
+        client = AsyncLlamaStackCli(
+            base_url=base_url, api_key=api_key, _strict_response_validation=True, default_headers={"X-Foo": "bar"}
         )
         request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
         assert request.headers.get("x-foo") == "bar"
         assert request.headers.get("x-stainless-lang") == "python"
 
-        client2 = AsyncLlamaStackClient(
+        client2 = AsyncLlamaStackCli(
             base_url=base_url,
+            api_key=api_key,
             _strict_response_validation=True,
             default_headers={
                 "X-Foo": "stainless",
@@ -1127,9 +1147,19 @@ def test_default_headers_option(self) -> None:
         assert request.headers.get("x-foo") == "stainless"
         assert request.headers.get("x-stainless-lang") == "my-overriding-header"
 
+    def test_validate_headers(self) -> None:
+        client = AsyncLlamaStackCli(base_url=base_url, api_key=api_key, _strict_response_validation=True)
+        request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
+        assert request.headers.get("api_key") == api_key
+
+        with pytest.raises(LlamaStackCliError):
+            with update_env(**{"PETSTORE_API_KEY": Omit()}):
+                client2 = AsyncLlamaStackCli(base_url=base_url, api_key=None, _strict_response_validation=True)
+            _ = client2
+
     def test_default_query_option(self) -> None:
-        client = AsyncLlamaStackClient(
-            base_url=base_url, _strict_response_validation=True, default_query={"query_param": "bar"}
+        client = AsyncLlamaStackCli(
+            base_url=base_url, api_key=api_key, _strict_response_validation=True, default_query={"query_param": "bar"}
         )
         request = client._build_request(FinalRequestOptions(method="get", url="/foo"))
         url = httpx.URL(request.url)
@@ -1242,10 +1272,10 @@ def test_request_extra_query(self) -> None:
         params = dict(request.url.params)
         assert params == {"foo": "2"}
 
-    def test_multipart_repeating_array(self, async_client: AsyncLlamaStackClient) -> None:
+    def test_multipart_repeating_array(self, async_client: AsyncLlamaStackCli) -> None:
         request = async_client._build_request(
             FinalRequestOptions.construct(
-                method="get",
+                method="post",
                 url="/foo",
                 headers={"Content-Type": "multipart/form-data; boundary=6b7ba517decee4a450543ea6ae821c82"},
                 json_data={"array": ["foo", "bar"]},
@@ -1329,7 +1359,9 @@ class Model(BaseModel):
         assert response.foo == 2
 
     def test_base_url_setter(self) -> None:
-        client = AsyncLlamaStackClient(base_url="https://example.com/from_init", _strict_response_validation=True)
+        client = AsyncLlamaStackCli(
+            base_url="https://example.com/from_init", api_key=api_key, _strict_response_validation=True
+        )
         assert client.base_url == "https://example.com/from_init/"
 
         client.base_url = "https://example.com/from_setter"  # type: ignore[assignment]
@@ -1337,23 +1369,26 @@ def test_base_url_setter(self) -> None:
         assert client.base_url == "https://example.com/from_setter/"
 
     def test_base_url_env(self) -> None:
-        with update_env(LLAMA_STACK_BASE_URL="http://localhost:5000/from/env"):
-            client = AsyncLlamaStackClient(_strict_response_validation=True)
+        with update_env(LLAMA_STACK_CLI_BASE_URL="http://localhost:5000/from/env"):
+            client = AsyncLlamaStackCli(api_key=api_key, _strict_response_validation=True)
             assert client.base_url == "http://localhost:5000/from/env/"
 
     @pytest.mark.parametrize(
         "client",
         [
-            AsyncLlamaStackClient(base_url="http://localhost:5000/custom/path/", _strict_response_validation=True),
-            AsyncLlamaStackClient(
+            AsyncLlamaStackCli(
+                base_url="http://localhost:5000/custom/path/", api_key=api_key, _strict_response_validation=True
+            ),
+            AsyncLlamaStackCli(
                 base_url="http://localhost:5000/custom/path/",
+                api_key=api_key,
                 _strict_response_validation=True,
                 http_client=httpx.AsyncClient(),
             ),
         ],
         ids=["standard", "custom http client"],
     )
-    def test_base_url_trailing_slash(self, client: AsyncLlamaStackClient) -> None:
+    def test_base_url_trailing_slash(self, client: AsyncLlamaStackCli) -> None:
         request = client._build_request(
             FinalRequestOptions(
                 method="post",
@@ -1366,16 +1401,19 @@ def test_base_url_trailing_slash(self, client: AsyncLlamaStackClient) -> None:
     @pytest.mark.parametrize(
         "client",
         [
-            AsyncLlamaStackClient(base_url="http://localhost:5000/custom/path/", _strict_response_validation=True),
-            AsyncLlamaStackClient(
+            AsyncLlamaStackCli(
+                base_url="http://localhost:5000/custom/path/", api_key=api_key, _strict_response_validation=True
+            ),
+            AsyncLlamaStackCli(
                 base_url="http://localhost:5000/custom/path/",
+                api_key=api_key,
                 _strict_response_validation=True,
                 http_client=httpx.AsyncClient(),
             ),
         ],
         ids=["standard", "custom http client"],
     )
-    def test_base_url_no_trailing_slash(self, client: AsyncLlamaStackClient) -> None:
+    def test_base_url_no_trailing_slash(self, client: AsyncLlamaStackCli) -> None:
         request = client._build_request(
             FinalRequestOptions(
                 method="post",
@@ -1388,16 +1426,19 @@ def test_base_url_no_trailing_slash(self, client: AsyncLlamaStackClient) -> None
     @pytest.mark.parametrize(
         "client",
         [
-            AsyncLlamaStackClient(base_url="http://localhost:5000/custom/path/", _strict_response_validation=True),
-            AsyncLlamaStackClient(
+            AsyncLlamaStackCli(
+                base_url="http://localhost:5000/custom/path/", api_key=api_key, _strict_response_validation=True
+            ),
+            AsyncLlamaStackCli(
                 base_url="http://localhost:5000/custom/path/",
+                api_key=api_key,
                 _strict_response_validation=True,
                 http_client=httpx.AsyncClient(),
             ),
         ],
         ids=["standard", "custom http client"],
     )
-    def test_absolute_request_url(self, client: AsyncLlamaStackClient) -> None:
+    def test_absolute_request_url(self, client: AsyncLlamaStackCli) -> None:
         request = client._build_request(
             FinalRequestOptions(
                 method="post",
@@ -1408,7 +1449,7 @@ def test_absolute_request_url(self, client: AsyncLlamaStackClient) -> None:
         assert request.url == "https://myapi.com/foo"
 
     async def test_copied_client_does_not_close_http(self) -> None:
-        client = AsyncLlamaStackClient(base_url=base_url, _strict_response_validation=True)
+        client = AsyncLlamaStackCli(base_url=base_url, api_key=api_key, _strict_response_validation=True)
         assert not client.is_closed()
 
         copied = client.copy()
@@ -1420,7 +1461,7 @@ async def test_copied_client_does_not_close_http(self) -> None:
         assert not client.is_closed()
 
     async def test_client_context_manager(self) -> None:
-        client = AsyncLlamaStackClient(base_url=base_url, _strict_response_validation=True)
+        client = AsyncLlamaStackCli(base_url=base_url, api_key=api_key, _strict_response_validation=True)
         async with client as c2:
             assert c2 is client
             assert not c2.is_closed()
@@ -1442,7 +1483,9 @@ class Model(BaseModel):
 
     async def test_client_max_retries_validation(self) -> None:
         with pytest.raises(TypeError, match=r"max_retries cannot be None"):
-            AsyncLlamaStackClient(base_url=base_url, _strict_response_validation=True, max_retries=cast(Any, None))
+            AsyncLlamaStackCli(
+                base_url=base_url, api_key=api_key, _strict_response_validation=True, max_retries=cast(Any, None)
+            )
 
     @pytest.mark.respx(base_url=base_url)
     @pytest.mark.asyncio
@@ -1452,12 +1495,12 @@ class Model(BaseModel):
 
         respx_mock.get("/foo").mock(return_value=httpx.Response(200, text="my-custom-format"))
 
-        strict_client = AsyncLlamaStackClient(base_url=base_url, _strict_response_validation=True)
+        strict_client = AsyncLlamaStackCli(base_url=base_url, api_key=api_key, _strict_response_validation=True)
 
         with pytest.raises(APIResponseValidationError):
             await strict_client.get("/foo", cast_to=Model)
 
-        client = AsyncLlamaStackClient(base_url=base_url, _strict_response_validation=False)
+        client = AsyncLlamaStackCli(base_url=base_url, api_key=api_key, _strict_response_validation=False)
 
         response = await client.get("/foo", cast_to=Model)
         assert isinstance(response, str)  # type: ignore[unreachable]
@@ -1486,60 +1529,44 @@ class Model(BaseModel):
     @mock.patch("time.time", mock.MagicMock(return_value=1696004797))
     @pytest.mark.asyncio
     async def test_parse_retry_after_header(self, remaining_retries: int, retry_after: str, timeout: float) -> None:
-        client = AsyncLlamaStackClient(base_url=base_url, _strict_response_validation=True)
+        client = AsyncLlamaStackCli(base_url=base_url, api_key=api_key, _strict_response_validation=True)
 
         headers = httpx.Headers({"retry-after": retry_after})
         options = FinalRequestOptions(method="get", url="/foo", max_retries=3)
         calculated = client._calculate_retry_timeout(remaining_retries, options, headers)
         assert calculated == pytest.approx(timeout, 0.5 * 0.875)  # pyright: ignore[reportUnknownMemberType]
 
-    @mock.patch("llama_stack_client._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @mock.patch("llama_stack_cli._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
     async def test_retrying_timeout_errors_doesnt_leak(
-        self, respx_mock: MockRouter, async_client: AsyncLlamaStackClient
+        self, respx_mock: MockRouter, async_client: AsyncLlamaStackCli
     ) -> None:
-        respx_mock.post("/v1/inference/chat-completion").mock(side_effect=httpx.TimeoutException("Test timeout error"))
+        respx_mock.get("/store/inventory").mock(side_effect=httpx.TimeoutException("Test timeout error"))
 
         with pytest.raises(APITimeoutError):
-            await async_client.inference.with_streaming_response.chat_completion(
-                messages=[
-                    {
-                        "content": "string",
-                        "role": "user",
-                    }
-                ],
-                model_id="model_id",
-            ).__aenter__()
+            await async_client.store.with_streaming_response.list_inventory().__aenter__()
 
         assert _get_open_connections(self.client) == 0
 
-    @mock.patch("llama_stack_client._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @mock.patch("llama_stack_cli._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
     async def test_retrying_status_errors_doesnt_leak(
-        self, respx_mock: MockRouter, async_client: AsyncLlamaStackClient
+        self, respx_mock: MockRouter, async_client: AsyncLlamaStackCli
     ) -> None:
-        respx_mock.post("/v1/inference/chat-completion").mock(return_value=httpx.Response(500))
+        respx_mock.get("/store/inventory").mock(return_value=httpx.Response(500))
 
         with pytest.raises(APIStatusError):
-            await async_client.inference.with_streaming_response.chat_completion(
-                messages=[
-                    {
-                        "content": "string",
-                        "role": "user",
-                    }
-                ],
-                model_id="model_id",
-            ).__aenter__()
+            await async_client.store.with_streaming_response.list_inventory().__aenter__()
         assert _get_open_connections(self.client) == 0
 
     @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
-    @mock.patch("llama_stack_client._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @mock.patch("llama_stack_cli._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
     @pytest.mark.asyncio
     @pytest.mark.parametrize("failure_mode", ["status", "exception"])
     async def test_retries_taken(
         self,
-        async_client: AsyncLlamaStackClient,
+        async_client: AsyncLlamaStackCli,
         failures_before_success: int,
         failure_mode: Literal["status", "exception"],
         respx_mock: MockRouter,
@@ -1557,27 +1584,19 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
                 return httpx.Response(500)
             return httpx.Response(200)
 
-        respx_mock.post("/v1/inference/chat-completion").mock(side_effect=retry_handler)
+        respx_mock.get("/store/inventory").mock(side_effect=retry_handler)
 
-        response = await client.inference.with_raw_response.chat_completion(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            model_id="model_id",
-        )
+        response = await client.store.with_raw_response.list_inventory()
 
         assert response.retries_taken == failures_before_success
         assert int(response.http_request.headers.get("x-stainless-retry-count")) == failures_before_success
 
     @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
-    @mock.patch("llama_stack_client._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @mock.patch("llama_stack_cli._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
     @pytest.mark.asyncio
     async def test_omit_retry_count_header(
-        self, async_client: AsyncLlamaStackClient, failures_before_success: int, respx_mock: MockRouter
+        self, async_client: AsyncLlamaStackCli, failures_before_success: int, respx_mock: MockRouter
     ) -> None:
         client = async_client.with_options(max_retries=4)
 
@@ -1590,27 +1609,20 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
                 return httpx.Response(500)
             return httpx.Response(200)
 
-        respx_mock.post("/v1/inference/chat-completion").mock(side_effect=retry_handler)
-
-        response = await client.inference.with_raw_response.chat_completion(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            model_id="model_id",
-            extra_headers={"x-stainless-retry-count": Omit()},
+        respx_mock.get("/store/inventory").mock(side_effect=retry_handler)
+
+        response = await client.store.with_raw_response.list_inventory(
+            extra_headers={"x-stainless-retry-count": Omit()}
         )
 
         assert len(response.http_request.headers.get_list("x-stainless-retry-count")) == 0
 
     @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
-    @mock.patch("llama_stack_client._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @mock.patch("llama_stack_cli._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
     @pytest.mark.respx(base_url=base_url)
     @pytest.mark.asyncio
     async def test_overwrite_retry_count_header(
-        self, async_client: AsyncLlamaStackClient, failures_before_success: int, respx_mock: MockRouter
+        self, async_client: AsyncLlamaStackCli, failures_before_success: int, respx_mock: MockRouter
     ) -> None:
         client = async_client.with_options(max_retries=4)
 
@@ -1623,18 +1635,9 @@ def retry_handler(_request: httpx.Request) -> httpx.Response:
                 return httpx.Response(500)
             return httpx.Response(200)
 
-        respx_mock.post("/v1/inference/chat-completion").mock(side_effect=retry_handler)
-
-        response = await client.inference.with_raw_response.chat_completion(
-            messages=[
-                {
-                    "content": "string",
-                    "role": "user",
-                }
-            ],
-            model_id="model_id",
-            extra_headers={"x-stainless-retry-count": "42"},
-        )
+        respx_mock.get("/store/inventory").mock(side_effect=retry_handler)
+
+        response = await client.store.with_raw_response.list_inventory(extra_headers={"x-stainless-retry-count": "42"})
 
         assert response.http_request.headers.get("x-stainless-retry-count") == "42"
 
@@ -1649,8 +1652,8 @@ def test_get_platform(self) -> None:
         import nest_asyncio
         import threading
 
-        from llama_stack_client._utils import asyncify
-        from llama_stack_client._base_client import get_platform
+        from llama_stack_cli._utils import asyncify
+        from llama_stack_cli._base_client import get_platform
 
         async def test_main() -> None:
             result = await asyncify(get_platform)()
diff --git a/tests/test_deepcopy.py b/tests/test_deepcopy.py
index 52056b76..100f1be9 100644
--- a/tests/test_deepcopy.py
+++ b/tests/test_deepcopy.py
@@ -1,4 +1,4 @@
-from llama_stack_client._utils import deepcopy_minimal
+from llama_stack_cli._utils import deepcopy_minimal
 
 
 def assert_different_identities(obj1: object, obj2: object) -> None:
diff --git a/tests/test_extract_files.py b/tests/test_extract_files.py
index 614e670a..6496c3f2 100644
--- a/tests/test_extract_files.py
+++ b/tests/test_extract_files.py
@@ -4,8 +4,8 @@
 
 import pytest
 
-from llama_stack_client._types import FileTypes
-from llama_stack_client._utils import extract_files
+from llama_stack_cli._types import FileTypes
+from llama_stack_cli._utils import extract_files
 
 
 def test_removes_files_from_input() -> None:
diff --git a/tests/test_files.py b/tests/test_files.py
index e4bcf976..db667b3a 100644
--- a/tests/test_files.py
+++ b/tests/test_files.py
@@ -4,7 +4,7 @@
 import pytest
 from dirty_equals import IsDict, IsList, IsBytes, IsTuple
 
-from llama_stack_client._files import to_httpx_files, async_to_httpx_files
+from llama_stack_cli._files import to_httpx_files, async_to_httpx_files
 
 readme_path = Path(__file__).parent.parent.joinpath("README.md")
 
diff --git a/tests/test_models.py b/tests/test_models.py
index a27dfa46..5579a6da 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -7,9 +7,9 @@
 import pydantic
 from pydantic import Field
 
-from llama_stack_client._utils import PropertyInfo
-from llama_stack_client._compat import PYDANTIC_V2, parse_obj, model_dump, model_json
-from llama_stack_client._models import BaseModel, construct_type
+from llama_stack_cli._utils import PropertyInfo
+from llama_stack_cli._compat import PYDANTIC_V2, parse_obj, model_dump, model_json
+from llama_stack_cli._models import BaseModel, construct_type
 
 
 class BasicModel(BaseModel):
@@ -889,3 +889,48 @@ class ModelB(BaseModel):
     )
 
     assert isinstance(m, ModelB)
+
+
+def test_nested_discriminated_union() -> None:
+    class InnerType1(BaseModel):
+        type: Literal["type_1"]
+
+    class InnerModel(BaseModel):
+        inner_value: str
+
+    class InnerType2(BaseModel):
+        type: Literal["type_2"]
+        some_inner_model: InnerModel
+
+    class Type1(BaseModel):
+        base_type: Literal["base_type_1"]
+        value: Annotated[
+            Union[
+                InnerType1,
+                InnerType2,
+            ],
+            PropertyInfo(discriminator="type"),
+        ]
+
+    class Type2(BaseModel):
+        base_type: Literal["base_type_2"]
+
+    T = Annotated[
+        Union[
+            Type1,
+            Type2,
+        ],
+        PropertyInfo(discriminator="base_type"),
+    ]
+
+    model = construct_type(
+        type_=T,
+        value={
+            "base_type": "base_type_1",
+            "value": {
+                "type": "type_2",
+            },
+        },
+    )
+    assert isinstance(model, Type1)
+    assert isinstance(model.value, InnerType2)
diff --git a/tests/test_qs.py b/tests/test_qs.py
index cff56e87..17050c8f 100644
--- a/tests/test_qs.py
+++ b/tests/test_qs.py
@@ -4,7 +4,7 @@
 
 import pytest
 
-from llama_stack_client._qs import Querystring, stringify
+from llama_stack_cli._qs import Querystring, stringify
 
 
 def test_empty() -> None:
diff --git a/tests/test_required_args.py b/tests/test_required_args.py
index 77bebd25..c4dc9907 100644
--- a/tests/test_required_args.py
+++ b/tests/test_required_args.py
@@ -2,7 +2,7 @@
 
 import pytest
 
-from llama_stack_client._utils import required_args
+from llama_stack_cli._utils import required_args
 
 
 def test_too_many_positional_params() -> None:
diff --git a/tests/test_response.py b/tests/test_response.py
index 8c803a25..c5c77c05 100644
--- a/tests/test_response.py
+++ b/tests/test_response.py
@@ -6,8 +6,8 @@
 import pytest
 import pydantic
 
-from llama_stack_client import BaseModel, LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client._response import (
+from llama_stack_cli import BaseModel, LlamaStackCli, AsyncLlamaStackCli
+from llama_stack_cli._response import (
     APIResponse,
     BaseAPIResponse,
     AsyncAPIResponse,
@@ -15,8 +15,8 @@
     AsyncBinaryAPIResponse,
     extract_response_type,
 )
-from llama_stack_client._streaming import Stream
-from llama_stack_client._base_client import FinalRequestOptions
+from llama_stack_cli._streaming import Stream
+from llama_stack_cli._base_client import FinalRequestOptions
 
 
 class ConcreteBaseAPIResponse(APIResponse[bytes]): ...
@@ -37,7 +37,7 @@ def test_extract_response_type_direct_classes() -> None:
 def test_extract_response_type_direct_class_missing_type_arg() -> None:
     with pytest.raises(
         RuntimeError,
-        match="Expected type <class 'llama_stack_client._response.AsyncAPIResponse'> to have a type argument at index 0 but it did not",
+        match="Expected type <class 'llama_stack_cli._response.AsyncAPIResponse'> to have a type argument at index 0 but it did not",
     ):
         extract_response_type(AsyncAPIResponse)
 
@@ -56,7 +56,7 @@ def test_extract_response_type_binary_response() -> None:
 class PydanticModel(pydantic.BaseModel): ...
 
 
-def test_response_parse_mismatched_basemodel(client: LlamaStackClient) -> None:
+def test_response_parse_mismatched_basemodel(client: LlamaStackCli) -> None:
     response = APIResponse(
         raw=httpx.Response(200, content=b"foo"),
         client=client,
@@ -68,13 +68,13 @@ def test_response_parse_mismatched_basemodel(client: LlamaStackClient) -> None:
 
     with pytest.raises(
         TypeError,
-        match="Pydantic models must subclass our base model type, e.g. `from llama_stack_client import BaseModel`",
+        match="Pydantic models must subclass our base model type, e.g. `from llama_stack_cli import BaseModel`",
     ):
         response.parse(to=PydanticModel)
 
 
 @pytest.mark.asyncio
-async def test_async_response_parse_mismatched_basemodel(async_client: AsyncLlamaStackClient) -> None:
+async def test_async_response_parse_mismatched_basemodel(async_client: AsyncLlamaStackCli) -> None:
     response = AsyncAPIResponse(
         raw=httpx.Response(200, content=b"foo"),
         client=async_client,
@@ -86,12 +86,12 @@ async def test_async_response_parse_mismatched_basemodel(async_client: AsyncLlam
 
     with pytest.raises(
         TypeError,
-        match="Pydantic models must subclass our base model type, e.g. `from llama_stack_client import BaseModel`",
+        match="Pydantic models must subclass our base model type, e.g. `from llama_stack_cli import BaseModel`",
     ):
         await response.parse(to=PydanticModel)
 
 
-def test_response_parse_custom_stream(client: LlamaStackClient) -> None:
+def test_response_parse_custom_stream(client: LlamaStackCli) -> None:
     response = APIResponse(
         raw=httpx.Response(200, content=b"foo"),
         client=client,
@@ -106,7 +106,7 @@ def test_response_parse_custom_stream(client: LlamaStackClient) -> None:
 
 
 @pytest.mark.asyncio
-async def test_async_response_parse_custom_stream(async_client: AsyncLlamaStackClient) -> None:
+async def test_async_response_parse_custom_stream(async_client: AsyncLlamaStackCli) -> None:
     response = AsyncAPIResponse(
         raw=httpx.Response(200, content=b"foo"),
         client=async_client,
@@ -125,7 +125,7 @@ class CustomModel(BaseModel):
     bar: int
 
 
-def test_response_parse_custom_model(client: LlamaStackClient) -> None:
+def test_response_parse_custom_model(client: LlamaStackCli) -> None:
     response = APIResponse(
         raw=httpx.Response(200, content=json.dumps({"foo": "hello!", "bar": 2})),
         client=client,
@@ -141,7 +141,7 @@ def test_response_parse_custom_model(client: LlamaStackClient) -> None:
 
 
 @pytest.mark.asyncio
-async def test_async_response_parse_custom_model(async_client: AsyncLlamaStackClient) -> None:
+async def test_async_response_parse_custom_model(async_client: AsyncLlamaStackCli) -> None:
     response = AsyncAPIResponse(
         raw=httpx.Response(200, content=json.dumps({"foo": "hello!", "bar": 2})),
         client=async_client,
@@ -156,7 +156,7 @@ async def test_async_response_parse_custom_model(async_client: AsyncLlamaStackCl
     assert obj.bar == 2
 
 
-def test_response_parse_annotated_type(client: LlamaStackClient) -> None:
+def test_response_parse_annotated_type(client: LlamaStackCli) -> None:
     response = APIResponse(
         raw=httpx.Response(200, content=json.dumps({"foo": "hello!", "bar": 2})),
         client=client,
@@ -173,7 +173,7 @@ def test_response_parse_annotated_type(client: LlamaStackClient) -> None:
     assert obj.bar == 2
 
 
-async def test_async_response_parse_annotated_type(async_client: AsyncLlamaStackClient) -> None:
+async def test_async_response_parse_annotated_type(async_client: AsyncLlamaStackCli) -> None:
     response = AsyncAPIResponse(
         raw=httpx.Response(200, content=json.dumps({"foo": "hello!", "bar": 2})),
         client=async_client,
@@ -201,7 +201,7 @@ async def test_async_response_parse_annotated_type(async_client: AsyncLlamaStack
         ("FalSe", False),
     ],
 )
-def test_response_parse_bool(client: LlamaStackClient, content: str, expected: bool) -> None:
+def test_response_parse_bool(client: LlamaStackCli, content: str, expected: bool) -> None:
     response = APIResponse(
         raw=httpx.Response(200, content=content),
         client=client,
@@ -226,7 +226,7 @@ def test_response_parse_bool(client: LlamaStackClient, content: str, expected: b
         ("FalSe", False),
     ],
 )
-async def test_async_response_parse_bool(client: AsyncLlamaStackClient, content: str, expected: bool) -> None:
+async def test_async_response_parse_bool(client: AsyncLlamaStackCli, content: str, expected: bool) -> None:
     response = AsyncAPIResponse(
         raw=httpx.Response(200, content=content),
         client=client,
@@ -245,7 +245,7 @@ class OtherModel(BaseModel):
 
 
 @pytest.mark.parametrize("client", [False], indirect=True)  # loose validation
-def test_response_parse_expect_model_union_non_json_content(client: LlamaStackClient) -> None:
+def test_response_parse_expect_model_union_non_json_content(client: LlamaStackCli) -> None:
     response = APIResponse(
         raw=httpx.Response(200, content=b"foo", headers={"Content-Type": "application/text"}),
         client=client,
@@ -262,7 +262,7 @@ def test_response_parse_expect_model_union_non_json_content(client: LlamaStackCl
 
 @pytest.mark.asyncio
 @pytest.mark.parametrize("async_client", [False], indirect=True)  # loose validation
-async def test_async_response_parse_expect_model_union_non_json_content(async_client: AsyncLlamaStackClient) -> None:
+async def test_async_response_parse_expect_model_union_non_json_content(async_client: AsyncLlamaStackCli) -> None:
     response = AsyncAPIResponse(
         raw=httpx.Response(200, content=b"foo", headers={"Content-Type": "application/text"}),
         client=async_client,
diff --git a/tests/test_streaming.py b/tests/test_streaming.py
index 875d126d..29c0395b 100644
--- a/tests/test_streaming.py
+++ b/tests/test_streaming.py
@@ -5,13 +5,13 @@
 import httpx
 import pytest
 
-from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
-from llama_stack_client._streaming import Stream, AsyncStream, ServerSentEvent
+from llama_stack_cli import LlamaStackCli, AsyncLlamaStackCli
+from llama_stack_cli._streaming import Stream, AsyncStream, ServerSentEvent
 
 
 @pytest.mark.asyncio
 @pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"])
-async def test_basic(sync: bool, client: LlamaStackClient, async_client: AsyncLlamaStackClient) -> None:
+async def test_basic(sync: bool, client: LlamaStackCli, async_client: AsyncLlamaStackCli) -> None:
     def body() -> Iterator[bytes]:
         yield b"event: completion\n"
         yield b'data: {"foo":true}\n'
@@ -28,7 +28,7 @@ def body() -> Iterator[bytes]:
 
 @pytest.mark.asyncio
 @pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"])
-async def test_data_missing_event(sync: bool, client: LlamaStackClient, async_client: AsyncLlamaStackClient) -> None:
+async def test_data_missing_event(sync: bool, client: LlamaStackCli, async_client: AsyncLlamaStackCli) -> None:
     def body() -> Iterator[bytes]:
         yield b'data: {"foo":true}\n'
         yield b"\n"
@@ -44,7 +44,7 @@ def body() -> Iterator[bytes]:
 
 @pytest.mark.asyncio
 @pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"])
-async def test_event_missing_data(sync: bool, client: LlamaStackClient, async_client: AsyncLlamaStackClient) -> None:
+async def test_event_missing_data(sync: bool, client: LlamaStackCli, async_client: AsyncLlamaStackCli) -> None:
     def body() -> Iterator[bytes]:
         yield b"event: ping\n"
         yield b"\n"
@@ -60,7 +60,7 @@ def body() -> Iterator[bytes]:
 
 @pytest.mark.asyncio
 @pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"])
-async def test_multiple_events(sync: bool, client: LlamaStackClient, async_client: AsyncLlamaStackClient) -> None:
+async def test_multiple_events(sync: bool, client: LlamaStackCli, async_client: AsyncLlamaStackCli) -> None:
     def body() -> Iterator[bytes]:
         yield b"event: ping\n"
         yield b"\n"
@@ -82,9 +82,7 @@ def body() -> Iterator[bytes]:
 
 @pytest.mark.asyncio
 @pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"])
-async def test_multiple_events_with_data(
-    sync: bool, client: LlamaStackClient, async_client: AsyncLlamaStackClient
-) -> None:
+async def test_multiple_events_with_data(sync: bool, client: LlamaStackCli, async_client: AsyncLlamaStackCli) -> None:
     def body() -> Iterator[bytes]:
         yield b"event: ping\n"
         yield b'data: {"foo":true}\n'
@@ -109,7 +107,7 @@ def body() -> Iterator[bytes]:
 @pytest.mark.asyncio
 @pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"])
 async def test_multiple_data_lines_with_empty_line(
-    sync: bool, client: LlamaStackClient, async_client: AsyncLlamaStackClient
+    sync: bool, client: LlamaStackCli, async_client: AsyncLlamaStackCli
 ) -> None:
     def body() -> Iterator[bytes]:
         yield b"event: ping\n"
@@ -133,7 +131,7 @@ def body() -> Iterator[bytes]:
 @pytest.mark.asyncio
 @pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"])
 async def test_data_json_escaped_double_new_line(
-    sync: bool, client: LlamaStackClient, async_client: AsyncLlamaStackClient
+    sync: bool, client: LlamaStackCli, async_client: AsyncLlamaStackCli
 ) -> None:
     def body() -> Iterator[bytes]:
         yield b"event: ping\n"
@@ -151,7 +149,7 @@ def body() -> Iterator[bytes]:
 
 @pytest.mark.asyncio
 @pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"])
-async def test_multiple_data_lines(sync: bool, client: LlamaStackClient, async_client: AsyncLlamaStackClient) -> None:
+async def test_multiple_data_lines(sync: bool, client: LlamaStackCli, async_client: AsyncLlamaStackCli) -> None:
     def body() -> Iterator[bytes]:
         yield b"event: ping\n"
         yield b"data: {\n"
@@ -171,8 +169,8 @@ def body() -> Iterator[bytes]:
 @pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"])
 async def test_special_new_line_character(
     sync: bool,
-    client: LlamaStackClient,
-    async_client: AsyncLlamaStackClient,
+    client: LlamaStackCli,
+    async_client: AsyncLlamaStackCli,
 ) -> None:
     def body() -> Iterator[bytes]:
         yield b'data: {"content":" culpa"}\n'
@@ -202,8 +200,8 @@ def body() -> Iterator[bytes]:
 @pytest.mark.parametrize("sync", [True, False], ids=["sync", "async"])
 async def test_multi_byte_character_multiple_chunks(
     sync: bool,
-    client: LlamaStackClient,
-    async_client: AsyncLlamaStackClient,
+    client: LlamaStackCli,
+    async_client: AsyncLlamaStackCli,
 ) -> None:
     def body() -> Iterator[bytes]:
         yield b'data: {"content":"'
@@ -243,8 +241,8 @@ def make_event_iterator(
     content: Iterator[bytes],
     *,
     sync: bool,
-    client: LlamaStackClient,
-    async_client: AsyncLlamaStackClient,
+    client: LlamaStackCli,
+    async_client: AsyncLlamaStackCli,
 ) -> Iterator[ServerSentEvent] | AsyncIterator[ServerSentEvent]:
     if sync:
         return Stream(cast_to=object, client=client, response=httpx.Response(200, content=content))._iter_events()
diff --git a/tests/test_transform.py b/tests/test_transform.py
index b6eb411d..cb5d484c 100644
--- a/tests/test_transform.py
+++ b/tests/test_transform.py
@@ -8,15 +8,15 @@
 
 import pytest
 
-from llama_stack_client._types import NOT_GIVEN, Base64FileInput
-from llama_stack_client._utils import (
+from llama_stack_cli._types import NOT_GIVEN, Base64FileInput
+from llama_stack_cli._utils import (
     PropertyInfo,
     transform as _transform,
     parse_datetime,
     async_transform as _async_transform,
 )
-from llama_stack_client._compat import PYDANTIC_V2
-from llama_stack_client._models import BaseModel
+from llama_stack_cli._compat import PYDANTIC_V2
+from llama_stack_cli._models import BaseModel
 
 _T = TypeVar("_T")
 
diff --git a/tests/test_utils/test_proxy.py b/tests/test_utils/test_proxy.py
index 76a29efd..10a9adaa 100644
--- a/tests/test_utils/test_proxy.py
+++ b/tests/test_utils/test_proxy.py
@@ -2,7 +2,7 @@
 from typing import Any
 from typing_extensions import override
 
-from llama_stack_client._utils import LazyProxy
+from llama_stack_cli._utils import LazyProxy
 
 
 class RecursiveLazyProxy(LazyProxy[Any]):
diff --git a/tests/test_utils/test_typing.py b/tests/test_utils/test_typing.py
index a38fbf5a..89c7dfd2 100644
--- a/tests/test_utils/test_typing.py
+++ b/tests/test_utils/test_typing.py
@@ -2,7 +2,7 @@
 
 from typing import Generic, TypeVar, cast
 
-from llama_stack_client._utils import extract_type_var_from_base
+from llama_stack_cli._utils import extract_type_var_from_base
 
 _T = TypeVar("_T")
 _T2 = TypeVar("_T2")
diff --git a/tests/utils.py b/tests/utils.py
index 9d0ce74f..c1c02f1b 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -8,8 +8,8 @@
 from datetime import date, datetime
 from typing_extensions import Literal, get_args, get_origin, assert_type
 
-from llama_stack_client._types import Omit, NoneType
-from llama_stack_client._utils import (
+from llama_stack_cli._types import Omit, NoneType
+from llama_stack_cli._utils import (
     is_dict,
     is_list,
     is_list_type,
@@ -18,8 +18,8 @@
     is_annotated_type,
     is_type_alias_type,
 )
-from llama_stack_client._compat import PYDANTIC_V2, field_outer_type, get_model_fields
-from llama_stack_client._models import BaseModel
+from llama_stack_cli._compat import PYDANTIC_V2, field_outer_type, get_model_fields
+from llama_stack_cli._models import BaseModel
 
 BaseModelT = TypeVar("BaseModelT", bound=BaseModel)
 
diff --git a/uv.lock b/uv.lock
deleted file mode 100644
index 7e95839b..00000000
--- a/uv.lock
+++ /dev/null
@@ -1,802 +0,0 @@
-version = 1
-revision = 2
-requires-python = ">=3.12"
-
-[[package]]
-name = "annotated-types"
-version = "0.7.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081, upload-time = "2024-05-20T21:33:25.928Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" },
-]
-
-[[package]]
-name = "anyio"
-version = "4.8.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "idna" },
-    { name = "sniffio" },
-    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/a3/73/199a98fc2dae33535d6b8e8e6ec01f8c1d76c9adb096c6b7d64823038cde/anyio-4.8.0.tar.gz", hash = "sha256:1d9fe889df5212298c0c0723fa20479d1b94883a2df44bd3897aa91083316f7a", size = 181126, upload-time = "2025-01-05T13:13:11.095Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/46/eb/e7f063ad1fec6b3178a3cd82d1a3c4de82cccf283fc42746168188e1cdd5/anyio-4.8.0-py3-none-any.whl", hash = "sha256:b5011f270ab5eb0abf13385f851315585cc37ef330dd88e27ec3d34d651fd47a", size = 96041, upload-time = "2025-01-05T13:13:07.985Z" },
-]
-
-[[package]]
-name = "black"
-version = "25.1.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "click" },
-    { name = "mypy-extensions" },
-    { name = "packaging" },
-    { name = "pathspec" },
-    { name = "platformdirs" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/94/49/26a7b0f3f35da4b5a65f081943b7bcd22d7002f5f0fb8098ec1ff21cb6ef/black-25.1.0.tar.gz", hash = "sha256:33496d5cd1222ad73391352b4ae8da15253c5de89b93a80b3e2c8d9a19ec2666", size = 649449, upload-time = "2025-01-29T04:15:40.373Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/83/71/3fe4741df7adf015ad8dfa082dd36c94ca86bb21f25608eb247b4afb15b2/black-25.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4b60580e829091e6f9238c848ea6750efed72140b91b048770b64e74fe04908b", size = 1650988, upload-time = "2025-01-29T05:37:16.707Z" },
-    { url = "https://files.pythonhosted.org/packages/13/f3/89aac8a83d73937ccd39bbe8fc6ac8860c11cfa0af5b1c96d081facac844/black-25.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1e2978f6df243b155ef5fa7e558a43037c3079093ed5d10fd84c43900f2d8ecc", size = 1453985, upload-time = "2025-01-29T05:37:18.273Z" },
-    { url = "https://files.pythonhosted.org/packages/6f/22/b99efca33f1f3a1d2552c714b1e1b5ae92efac6c43e790ad539a163d1754/black-25.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b48735872ec535027d979e8dcb20bf4f70b5ac75a8ea99f127c106a7d7aba9f", size = 1783816, upload-time = "2025-01-29T04:18:33.823Z" },
-    { url = "https://files.pythonhosted.org/packages/18/7e/a27c3ad3822b6f2e0e00d63d58ff6299a99a5b3aee69fa77cd4b0076b261/black-25.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:ea0213189960bda9cf99be5b8c8ce66bb054af5e9e861249cd23471bd7b0b3ba", size = 1440860, upload-time = "2025-01-29T04:19:12.944Z" },
-    { url = "https://files.pythonhosted.org/packages/98/87/0edf98916640efa5d0696e1abb0a8357b52e69e82322628f25bf14d263d1/black-25.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8f0b18a02996a836cc9c9c78e5babec10930862827b1b724ddfe98ccf2f2fe4f", size = 1650673, upload-time = "2025-01-29T05:37:20.574Z" },
-    { url = "https://files.pythonhosted.org/packages/52/e5/f7bf17207cf87fa6e9b676576749c6b6ed0d70f179a3d812c997870291c3/black-25.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:afebb7098bfbc70037a053b91ae8437c3857482d3a690fefc03e9ff7aa9a5fd3", size = 1453190, upload-time = "2025-01-29T05:37:22.106Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/ee/adda3d46d4a9120772fae6de454c8495603c37c4c3b9c60f25b1ab6401fe/black-25.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:030b9759066a4ee5e5aca28c3c77f9c64789cdd4de8ac1df642c40b708be6171", size = 1782926, upload-time = "2025-01-29T04:18:58.564Z" },
-    { url = "https://files.pythonhosted.org/packages/cc/64/94eb5f45dcb997d2082f097a3944cfc7fe87e071907f677e80788a2d7b7a/black-25.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:a22f402b410566e2d1c950708c77ebf5ebd5d0d88a6a2e87c86d9fb48afa0d18", size = 1442613, upload-time = "2025-01-29T04:19:27.63Z" },
-    { url = "https://files.pythonhosted.org/packages/09/71/54e999902aed72baf26bca0d50781b01838251a462612966e9fc4891eadd/black-25.1.0-py3-none-any.whl", hash = "sha256:95e8176dae143ba9097f351d174fdaf0ccd29efb414b362ae3fd72bf0f710717", size = 207646, upload-time = "2025-01-29T04:15:38.082Z" },
-]
-
-[[package]]
-name = "certifi"
-version = "2025.1.31"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/1c/ab/c9f1e32b7b1bf505bf26f0ef697775960db7932abeb7b516de930ba2705f/certifi-2025.1.31.tar.gz", hash = "sha256:3d5da6925056f6f18f119200434a4780a94263f10d1c21d032a6f6b2baa20651", size = 167577, upload-time = "2025-01-31T02:16:47.166Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/38/fc/bce832fd4fd99766c04d1ee0eead6b0ec6486fb100ae5e74c1d91292b982/certifi-2025.1.31-py3-none-any.whl", hash = "sha256:ca78db4565a652026a4db2bcdf68f2fb589ea80d0be70e03929ed730746b84fe", size = 166393, upload-time = "2025-01-31T02:16:45.015Z" },
-]
-
-[[package]]
-name = "cfgv"
-version = "3.4.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/11/74/539e56497d9bd1d484fd863dd69cbbfa653cd2aa27abfe35653494d85e94/cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560", size = 7114, upload-time = "2023-08-12T20:38:17.776Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/c5/55/51844dd50c4fc7a33b653bfaba4c2456f06955289ca770a5dbd5fd267374/cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9", size = 7249, upload-time = "2023-08-12T20:38:16.269Z" },
-]
-
-[[package]]
-name = "charset-normalizer"
-version = "3.4.2"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/e4/33/89c2ced2b67d1c2a61c19c6751aa8902d46ce3dacb23600a283619f5a12d/charset_normalizer-3.4.2.tar.gz", hash = "sha256:5baececa9ecba31eff645232d59845c07aa030f0c81ee70184a90d35099a0e63", size = 126367, upload-time = "2025-05-02T08:34:42.01Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/d7/a4/37f4d6035c89cac7930395a35cc0f1b872e652eaafb76a6075943754f095/charset_normalizer-3.4.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0c29de6a1a95f24b9a1aa7aefd27d2487263f00dfd55a77719b530788f75cff7", size = 199936, upload-time = "2025-05-02T08:32:33.712Z" },
-    { url = "https://files.pythonhosted.org/packages/ee/8a/1a5e33b73e0d9287274f899d967907cd0bf9c343e651755d9307e0dbf2b3/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cddf7bd982eaa998934a91f69d182aec997c6c468898efe6679af88283b498d3", size = 143790, upload-time = "2025-05-02T08:32:35.768Z" },
-    { url = "https://files.pythonhosted.org/packages/66/52/59521f1d8e6ab1482164fa21409c5ef44da3e9f653c13ba71becdd98dec3/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fcbe676a55d7445b22c10967bceaaf0ee69407fbe0ece4d032b6eb8d4565982a", size = 153924, upload-time = "2025-05-02T08:32:37.284Z" },
-    { url = "https://files.pythonhosted.org/packages/86/2d/fb55fdf41964ec782febbf33cb64be480a6b8f16ded2dbe8db27a405c09f/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d41c4d287cfc69060fa91cae9683eacffad989f1a10811995fa309df656ec214", size = 146626, upload-time = "2025-05-02T08:32:38.803Z" },
-    { url = "https://files.pythonhosted.org/packages/8c/73/6ede2ec59bce19b3edf4209d70004253ec5f4e319f9a2e3f2f15601ed5f7/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e594135de17ab3866138f496755f302b72157d115086d100c3f19370839dd3a", size = 148567, upload-time = "2025-05-02T08:32:40.251Z" },
-    { url = "https://files.pythonhosted.org/packages/09/14/957d03c6dc343c04904530b6bef4e5efae5ec7d7990a7cbb868e4595ee30/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cf713fe9a71ef6fd5adf7a79670135081cd4431c2943864757f0fa3a65b1fafd", size = 150957, upload-time = "2025-05-02T08:32:41.705Z" },
-    { url = "https://files.pythonhosted.org/packages/0d/c8/8174d0e5c10ccebdcb1b53cc959591c4c722a3ad92461a273e86b9f5a302/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a370b3e078e418187da8c3674eddb9d983ec09445c99a3a263c2011993522981", size = 145408, upload-time = "2025-05-02T08:32:43.709Z" },
-    { url = "https://files.pythonhosted.org/packages/58/aa/8904b84bc8084ac19dc52feb4f5952c6df03ffb460a887b42615ee1382e8/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a955b438e62efdf7e0b7b52a64dc5c3396e2634baa62471768a64bc2adb73d5c", size = 153399, upload-time = "2025-05-02T08:32:46.197Z" },
-    { url = "https://files.pythonhosted.org/packages/c2/26/89ee1f0e264d201cb65cf054aca6038c03b1a0c6b4ae998070392a3ce605/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:7222ffd5e4de8e57e03ce2cef95a4c43c98fcb72ad86909abdfc2c17d227fc1b", size = 156815, upload-time = "2025-05-02T08:32:48.105Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/07/68e95b4b345bad3dbbd3a8681737b4338ff2c9df29856a6d6d23ac4c73cb/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:bee093bf902e1d8fc0ac143c88902c3dfc8941f7ea1d6a8dd2bcb786d33db03d", size = 154537, upload-time = "2025-05-02T08:32:49.719Z" },
-    { url = "https://files.pythonhosted.org/packages/77/1a/5eefc0ce04affb98af07bc05f3bac9094513c0e23b0562d64af46a06aae4/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dedb8adb91d11846ee08bec4c8236c8549ac721c245678282dcb06b221aab59f", size = 149565, upload-time = "2025-05-02T08:32:51.404Z" },
-    { url = "https://files.pythonhosted.org/packages/37/a0/2410e5e6032a174c95e0806b1a6585eb21e12f445ebe239fac441995226a/charset_normalizer-3.4.2-cp312-cp312-win32.whl", hash = "sha256:db4c7bf0e07fc3b7d89ac2a5880a6a8062056801b83ff56d8464b70f65482b6c", size = 98357, upload-time = "2025-05-02T08:32:53.079Z" },
-    { url = "https://files.pythonhosted.org/packages/6c/4f/c02d5c493967af3eda9c771ad4d2bbc8df6f99ddbeb37ceea6e8716a32bc/charset_normalizer-3.4.2-cp312-cp312-win_amd64.whl", hash = "sha256:5a9979887252a82fefd3d3ed2a8e3b937a7a809f65dcb1e068b090e165bbe99e", size = 105776, upload-time = "2025-05-02T08:32:54.573Z" },
-    { url = "https://files.pythonhosted.org/packages/ea/12/a93df3366ed32db1d907d7593a94f1fe6293903e3e92967bebd6950ed12c/charset_normalizer-3.4.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:926ca93accd5d36ccdabd803392ddc3e03e6d4cd1cf17deff3b989ab8e9dbcf0", size = 199622, upload-time = "2025-05-02T08:32:56.363Z" },
-    { url = "https://files.pythonhosted.org/packages/04/93/bf204e6f344c39d9937d3c13c8cd5bbfc266472e51fc8c07cb7f64fcd2de/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eba9904b0f38a143592d9fc0e19e2df0fa2e41c3c3745554761c5f6447eedabf", size = 143435, upload-time = "2025-05-02T08:32:58.551Z" },
-    { url = "https://files.pythonhosted.org/packages/22/2a/ea8a2095b0bafa6c5b5a55ffdc2f924455233ee7b91c69b7edfcc9e02284/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3fddb7e2c84ac87ac3a947cb4e66d143ca5863ef48e4a5ecb83bd48619e4634e", size = 153653, upload-time = "2025-05-02T08:33:00.342Z" },
-    { url = "https://files.pythonhosted.org/packages/b6/57/1b090ff183d13cef485dfbe272e2fe57622a76694061353c59da52c9a659/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:98f862da73774290f251b9df8d11161b6cf25b599a66baf087c1ffe340e9bfd1", size = 146231, upload-time = "2025-05-02T08:33:02.081Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/28/ffc026b26f441fc67bd21ab7f03b313ab3fe46714a14b516f931abe1a2d8/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c9379d65defcab82d07b2a9dfbfc2e95bc8fe0ebb1b176a3190230a3ef0e07c", size = 148243, upload-time = "2025-05-02T08:33:04.063Z" },
-    { url = "https://files.pythonhosted.org/packages/c0/0f/9abe9bd191629c33e69e47c6ef45ef99773320e9ad8e9cb08b8ab4a8d4cb/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e635b87f01ebc977342e2697d05b56632f5f879a4f15955dfe8cef2448b51691", size = 150442, upload-time = "2025-05-02T08:33:06.418Z" },
-    { url = "https://files.pythonhosted.org/packages/67/7c/a123bbcedca91d5916c056407f89a7f5e8fdfce12ba825d7d6b9954a1a3c/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1c95a1e2902a8b722868587c0e1184ad5c55631de5afc0eb96bc4b0d738092c0", size = 145147, upload-time = "2025-05-02T08:33:08.183Z" },
-    { url = "https://files.pythonhosted.org/packages/ec/fe/1ac556fa4899d967b83e9893788e86b6af4d83e4726511eaaad035e36595/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ef8de666d6179b009dce7bcb2ad4c4a779f113f12caf8dc77f0162c29d20490b", size = 153057, upload-time = "2025-05-02T08:33:09.986Z" },
-    { url = "https://files.pythonhosted.org/packages/2b/ff/acfc0b0a70b19e3e54febdd5301a98b72fa07635e56f24f60502e954c461/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:32fc0341d72e0f73f80acb0a2c94216bd704f4f0bce10aedea38f30502b271ff", size = 156454, upload-time = "2025-05-02T08:33:11.814Z" },
-    { url = "https://files.pythonhosted.org/packages/92/08/95b458ce9c740d0645feb0e96cea1f5ec946ea9c580a94adfe0b617f3573/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:289200a18fa698949d2b39c671c2cc7a24d44096784e76614899a7ccf2574b7b", size = 154174, upload-time = "2025-05-02T08:33:13.707Z" },
-    { url = "https://files.pythonhosted.org/packages/78/be/8392efc43487ac051eee6c36d5fbd63032d78f7728cb37aebcc98191f1ff/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4a476b06fbcf359ad25d34a057b7219281286ae2477cc5ff5e3f70a246971148", size = 149166, upload-time = "2025-05-02T08:33:15.458Z" },
-    { url = "https://files.pythonhosted.org/packages/44/96/392abd49b094d30b91d9fbda6a69519e95802250b777841cf3bda8fe136c/charset_normalizer-3.4.2-cp313-cp313-win32.whl", hash = "sha256:aaeeb6a479c7667fbe1099af9617c83aaca22182d6cf8c53966491a0f1b7ffb7", size = 98064, upload-time = "2025-05-02T08:33:17.06Z" },
-    { url = "https://files.pythonhosted.org/packages/e9/b0/0200da600134e001d91851ddc797809e2fe0ea72de90e09bec5a2fbdaccb/charset_normalizer-3.4.2-cp313-cp313-win_amd64.whl", hash = "sha256:aa6af9e7d59f9c12b33ae4e9450619cf2488e2bbe9b44030905877f0b2324980", size = 105641, upload-time = "2025-05-02T08:33:18.753Z" },
-    { url = "https://files.pythonhosted.org/packages/20/94/c5790835a017658cbfabd07f3bfb549140c3ac458cfc196323996b10095a/charset_normalizer-3.4.2-py3-none-any.whl", hash = "sha256:7f56930ab0abd1c45cd15be65cc741c28b1c9a34876ce8c17a2fa107810c0af0", size = 52626, upload-time = "2025-05-02T08:34:40.053Z" },
-]
-
-[[package]]
-name = "click"
-version = "8.1.8"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "colorama", marker = "sys_platform == 'win32'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/b9/2e/0090cbf739cee7d23781ad4b89a9894a41538e4fcf4c31dcdd705b78eb8b/click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a", size = 226593, upload-time = "2024-12-21T18:38:44.339Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/7e/d4/7ebdbd03970677812aac39c869717059dbb71a4cfc033ca6e5221787892c/click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2", size = 98188, upload-time = "2024-12-21T18:38:41.666Z" },
-]
-
-[[package]]
-name = "colorama"
-version = "0.4.6"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
-]
-
-[[package]]
-name = "dirty-equals"
-version = "0.9.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/b0/99/133892f401ced5a27e641a473c547d5fbdb39af8f85dac8a9d633ea3e7a7/dirty_equals-0.9.0.tar.gz", hash = "sha256:17f515970b04ed7900b733c95fd8091f4f85e52f1fb5f268757f25c858eb1f7b", size = 50412, upload-time = "2025-01-11T23:23:40.491Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/77/0c/03cc99bf3b6328604b10829de3460f2b2ad3373200c45665c38508e550c6/dirty_equals-0.9.0-py3-none-any.whl", hash = "sha256:ff4d027f5cfa1b69573af00f7ba9043ea652dbdce3fe5cbe828e478c7346db9c", size = 28226, upload-time = "2025-01-11T23:23:37.489Z" },
-]
-
-[[package]]
-name = "distlib"
-version = "0.3.9"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/0d/dd/1bec4c5ddb504ca60fc29472f3d27e8d4da1257a854e1d96742f15c1d02d/distlib-0.3.9.tar.gz", hash = "sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403", size = 613923, upload-time = "2024-10-09T18:35:47.551Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/91/a1/cf2472db20f7ce4a6be1253a81cfdf85ad9c7885ffbed7047fb72c24cf87/distlib-0.3.9-py2.py3-none-any.whl", hash = "sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87", size = 468973, upload-time = "2024-10-09T18:35:44.272Z" },
-]
-
-[[package]]
-name = "distro"
-version = "1.9.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/fc/f8/98eea607f65de6527f8a2e8885fc8015d3e6f5775df186e443e0964a11c3/distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", size = 60722, upload-time = "2023-12-24T09:54:32.31Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277, upload-time = "2023-12-24T09:54:30.421Z" },
-]
-
-[[package]]
-name = "filelock"
-version = "3.18.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/0a/10/c23352565a6544bdc5353e0b15fc1c563352101f30e24bf500207a54df9a/filelock-3.18.0.tar.gz", hash = "sha256:adbc88eabb99d2fec8c9c1b229b171f18afa655400173ddc653d5d01501fb9f2", size = 18075, upload-time = "2025-03-14T07:11:40.47Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/4d/36/2a115987e2d8c300a974597416d9de88f2444426de9571f4b59b2cca3acc/filelock-3.18.0-py3-none-any.whl", hash = "sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de", size = 16215, upload-time = "2025-03-14T07:11:39.145Z" },
-]
-
-[[package]]
-name = "fire"
-version = "0.7.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "termcolor" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/6b/b6/82c7e601d6d3c3278c40b7bd35e17e82aa227f050aa9f66cb7b7fce29471/fire-0.7.0.tar.gz", hash = "sha256:961550f07936eaf65ad1dc8360f2b2bf8408fad46abbfa4d2a3794f8d2a95cdf", size = 87189, upload-time = "2024-10-01T14:29:31.585Z" }
-
-[[package]]
-name = "h11"
-version = "0.14.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/f5/38/3af3d3633a34a3316095b39c8e8fb4853a28a536e55d347bd8d8e9a14b03/h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d", size = 100418, upload-time = "2022-09-25T15:40:01.519Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/95/04/ff642e65ad6b90db43e668d70ffb6736436c7ce41fcc549f4e9472234127/h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761", size = 58259, upload-time = "2022-09-25T15:39:59.68Z" },
-]
-
-[[package]]
-name = "httpcore"
-version = "1.0.7"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "certifi" },
-    { name = "h11" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/6a/41/d7d0a89eb493922c37d343b607bc1b5da7f5be7e383740b4753ad8943e90/httpcore-1.0.7.tar.gz", hash = "sha256:8551cb62a169ec7162ac7be8d4817d561f60e08eaa485234898414bb5a8a0b4c", size = 85196, upload-time = "2024-11-15T12:30:47.531Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/87/f5/72347bc88306acb359581ac4d52f23c0ef445b57157adedb9aee0cd689d2/httpcore-1.0.7-py3-none-any.whl", hash = "sha256:a3fff8f43dc260d5bd363d9f9cf1830fa3a458b332856f34282de498ed420edd", size = 78551, upload-time = "2024-11-15T12:30:45.782Z" },
-]
-
-[[package]]
-name = "httpx"
-version = "0.28.1"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "anyio" },
-    { name = "certifi" },
-    { name = "httpcore" },
-    { name = "idna" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload-time = "2024-12-06T15:37:23.222Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" },
-]
-
-[[package]]
-name = "identify"
-version = "2.6.12"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/a2/88/d193a27416618628a5eea64e3223acd800b40749a96ffb322a9b55a49ed1/identify-2.6.12.tar.gz", hash = "sha256:d8de45749f1efb108badef65ee8386f0f7bb19a7f26185f74de6367bffbaf0e6", size = 99254, upload-time = "2025-05-23T20:37:53.3Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/7a/cd/18f8da995b658420625f7ef13f037be53ae04ec5ad33f9b718240dcfd48c/identify-2.6.12-py2.py3-none-any.whl", hash = "sha256:ad9672d5a72e0d2ff7c5c8809b62dfa60458626352fb0eb7b55e69bdc45334a2", size = 99145, upload-time = "2025-05-23T20:37:51.495Z" },
-]
-
-[[package]]
-name = "idna"
-version = "3.10"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490, upload-time = "2024-09-15T18:07:39.745Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442, upload-time = "2024-09-15T18:07:37.964Z" },
-]
-
-[[package]]
-name = "iniconfig"
-version = "2.1.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/f2/97/ebf4da567aa6827c909642694d71c9fcf53e5b504f2d96afea02718862f3/iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7", size = 4793, upload-time = "2025-03-19T20:09:59.721Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050, upload-time = "2025-03-19T20:10:01.071Z" },
-]
-
-[[package]]
-name = "llama-stack-client"
-version = "0.2.13"
-source = { editable = "." }
-dependencies = [
-    { name = "anyio" },
-    { name = "click" },
-    { name = "distro" },
-    { name = "fire" },
-    { name = "httpx" },
-    { name = "pandas" },
-    { name = "prompt-toolkit" },
-    { name = "pyaml" },
-    { name = "pydantic" },
-    { name = "requests" },
-    { name = "rich" },
-    { name = "sniffio" },
-    { name = "termcolor" },
-    { name = "tqdm" },
-    { name = "typing-extensions" },
-]
-
-[package.dev-dependencies]
-dev = [
-    { name = "black" },
-    { name = "dirty-equals" },
-    { name = "mypy" },
-    { name = "pre-commit" },
-    { name = "pytest" },
-    { name = "pytest-asyncio" },
-    { name = "respx" },
-    { name = "ruff" },
-]
-
-[package.metadata]
-requires-dist = [
-    { name = "anyio", specifier = ">=3.5.0,<5" },
-    { name = "click" },
-    { name = "distro", specifier = ">=1.7.0,<2" },
-    { name = "fire" },
-    { name = "httpx", specifier = ">=0.23.0,<1" },
-    { name = "pandas" },
-    { name = "prompt-toolkit" },
-    { name = "pyaml" },
-    { name = "pydantic", specifier = ">=1.9.0,<3" },
-    { name = "requests" },
-    { name = "rich" },
-    { name = "sniffio" },
-    { name = "termcolor" },
-    { name = "tqdm" },
-    { name = "typing-extensions", specifier = ">=4.7,<5" },
-]
-
-[package.metadata.requires-dev]
-dev = [
-    { name = "black" },
-    { name = "dirty-equals" },
-    { name = "mypy" },
-    { name = "pre-commit" },
-    { name = "pytest", specifier = ">=7.1.1" },
-    { name = "pytest-asyncio" },
-    { name = "respx" },
-    { name = "ruff" },
-]
-
-[[package]]
-name = "markdown-it-py"
-version = "3.0.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "mdurl" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/38/71/3b932df36c1a044d397a1f92d1cf91ee0a503d91e470cbd670aa66b07ed0/markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb", size = 74596, upload-time = "2023-06-03T06:41:14.443Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/42/d7/1ec15b46af6af88f19b8e5ffea08fa375d433c998b8a7639e76935c14f1f/markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1", size = 87528, upload-time = "2023-06-03T06:41:11.019Z" },
-]
-
-[[package]]
-name = "mdurl"
-version = "0.1.2"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
-]
-
-[[package]]
-name = "mypy"
-version = "1.16.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "mypy-extensions" },
-    { name = "pathspec" },
-    { name = "typing-extensions" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/d4/38/13c2f1abae94d5ea0354e146b95a1be9b2137a0d506728e0da037c4276f6/mypy-1.16.0.tar.gz", hash = "sha256:84b94283f817e2aa6350a14b4a8fb2a35a53c286f97c9d30f53b63620e7af8ab", size = 3323139, upload-time = "2025-05-29T13:46:12.532Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/70/cf/158e5055e60ca2be23aec54a3010f89dcffd788732634b344fc9cb1e85a0/mypy-1.16.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c5436d11e89a3ad16ce8afe752f0f373ae9620841c50883dc96f8b8805620b13", size = 11062927, upload-time = "2025-05-29T13:35:52.328Z" },
-    { url = "https://files.pythonhosted.org/packages/94/34/cfff7a56be1609f5d10ef386342ce3494158e4d506516890142007e6472c/mypy-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f2622af30bf01d8fc36466231bdd203d120d7a599a6d88fb22bdcb9dbff84090", size = 10083082, upload-time = "2025-05-29T13:35:33.378Z" },
-    { url = "https://files.pythonhosted.org/packages/b3/7f/7242062ec6288c33d8ad89574df87c3903d394870e5e6ba1699317a65075/mypy-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d045d33c284e10a038f5e29faca055b90eee87da3fc63b8889085744ebabb5a1", size = 11828306, upload-time = "2025-05-29T13:21:02.164Z" },
-    { url = "https://files.pythonhosted.org/packages/6f/5f/b392f7b4f659f5b619ce5994c5c43caab3d80df2296ae54fa888b3d17f5a/mypy-1.16.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b4968f14f44c62e2ec4a038c8797a87315be8df7740dc3ee8d3bfe1c6bf5dba8", size = 12702764, upload-time = "2025-05-29T13:20:42.826Z" },
-    { url = "https://files.pythonhosted.org/packages/9b/c0/7646ef3a00fa39ac9bc0938626d9ff29d19d733011be929cfea59d82d136/mypy-1.16.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:eb14a4a871bb8efb1e4a50360d4e3c8d6c601e7a31028a2c79f9bb659b63d730", size = 12896233, upload-time = "2025-05-29T13:18:37.446Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/38/52f4b808b3fef7f0ef840ee8ff6ce5b5d77381e65425758d515cdd4f5bb5/mypy-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:bd4e1ebe126152a7bbaa4daedd781c90c8f9643c79b9748caa270ad542f12bec", size = 9565547, upload-time = "2025-05-29T13:20:02.836Z" },
-    { url = "https://files.pythonhosted.org/packages/97/9c/ca03bdbefbaa03b264b9318a98950a9c683e06472226b55472f96ebbc53d/mypy-1.16.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a9e056237c89f1587a3be1a3a70a06a698d25e2479b9a2f57325ddaaffc3567b", size = 11059753, upload-time = "2025-05-29T13:18:18.167Z" },
-    { url = "https://files.pythonhosted.org/packages/36/92/79a969b8302cfe316027c88f7dc6fee70129490a370b3f6eb11d777749d0/mypy-1.16.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0b07e107affb9ee6ce1f342c07f51552d126c32cd62955f59a7db94a51ad12c0", size = 10073338, upload-time = "2025-05-29T13:19:48.079Z" },
-    { url = "https://files.pythonhosted.org/packages/14/9b/a943f09319167da0552d5cd722104096a9c99270719b1afeea60d11610aa/mypy-1.16.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c6fb60cbd85dc65d4d63d37cb5c86f4e3a301ec605f606ae3a9173e5cf34997b", size = 11827764, upload-time = "2025-05-29T13:46:04.47Z" },
-    { url = "https://files.pythonhosted.org/packages/ec/64/ff75e71c65a0cb6ee737287c7913ea155845a556c64144c65b811afdb9c7/mypy-1.16.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a7e32297a437cc915599e0578fa6bc68ae6a8dc059c9e009c628e1c47f91495d", size = 12701356, upload-time = "2025-05-29T13:35:13.553Z" },
-    { url = "https://files.pythonhosted.org/packages/0a/ad/0e93c18987a1182c350f7a5fab70550852f9fabe30ecb63bfbe51b602074/mypy-1.16.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:afe420c9380ccec31e744e8baff0d406c846683681025db3531b32db56962d52", size = 12900745, upload-time = "2025-05-29T13:17:24.409Z" },
-    { url = "https://files.pythonhosted.org/packages/28/5d/036c278d7a013e97e33f08c047fe5583ab4f1fc47c9a49f985f1cdd2a2d7/mypy-1.16.0-cp313-cp313-win_amd64.whl", hash = "sha256:55f9076c6ce55dd3f8cd0c6fff26a008ca8e5131b89d5ba6d86bd3f47e736eeb", size = 9572200, upload-time = "2025-05-29T13:33:44.92Z" },
-    { url = "https://files.pythonhosted.org/packages/99/a3/6ed10530dec8e0fdc890d81361260c9ef1f5e5c217ad8c9b21ecb2b8366b/mypy-1.16.0-py3-none-any.whl", hash = "sha256:29e1499864a3888bca5c1542f2d7232c6e586295183320caa95758fc84034031", size = 2265773, upload-time = "2025-05-29T13:35:18.762Z" },
-]
-
-[[package]]
-name = "mypy-extensions"
-version = "1.1.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/a2/6e/371856a3fb9d31ca8dac321cda606860fa4548858c0cc45d9d1d4ca2628b/mypy_extensions-1.1.0.tar.gz", hash = "sha256:52e68efc3284861e772bbcd66823fde5ae21fd2fdb51c62a211403730b916558", size = 6343, upload-time = "2025-04-22T14:54:24.164Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963, upload-time = "2025-04-22T14:54:22.983Z" },
-]
-
-[[package]]
-name = "nodeenv"
-version = "1.9.1"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/43/16/fc88b08840de0e0a72a2f9d8c6bae36be573e475a6326ae854bcc549fc45/nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f", size = 47437, upload-time = "2024-06-04T18:44:11.171Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/d2/1d/1b658dbd2b9fa9c4c9f32accbfc0205d532c8c6194dc0f2a4c0428e7128a/nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9", size = 22314, upload-time = "2024-06-04T18:44:08.352Z" },
-]
-
-[[package]]
-name = "numpy"
-version = "2.2.3"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/fb/90/8956572f5c4ae52201fdec7ba2044b2c882832dcec7d5d0922c9e9acf2de/numpy-2.2.3.tar.gz", hash = "sha256:dbdc15f0c81611925f382dfa97b3bd0bc2c1ce19d4fe50482cb0ddc12ba30020", size = 20262700, upload-time = "2025-02-13T17:17:41.558Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/43/ec/43628dcf98466e087812142eec6d1c1a6c6bdfdad30a0aa07b872dc01f6f/numpy-2.2.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:12c045f43b1d2915eca6b880a7f4a256f59d62df4f044788c8ba67709412128d", size = 20929458, upload-time = "2025-02-13T16:48:32.527Z" },
-    { url = "https://files.pythonhosted.org/packages/9b/c0/2f4225073e99a5c12350954949ed19b5d4a738f541d33e6f7439e33e98e4/numpy-2.2.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:87eed225fd415bbae787f93a457af7f5990b92a334e346f72070bf569b9c9c95", size = 14115299, upload-time = "2025-02-13T16:48:54.659Z" },
-    { url = "https://files.pythonhosted.org/packages/ca/fa/d2c5575d9c734a7376cc1592fae50257ec95d061b27ee3dbdb0b3b551eb2/numpy-2.2.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:712a64103d97c404e87d4d7c47fb0c7ff9acccc625ca2002848e0d53288b90ea", size = 5145723, upload-time = "2025-02-13T16:49:04.561Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/dc/023dad5b268a7895e58e791f28dc1c60eb7b6c06fcbc2af8538ad069d5f3/numpy-2.2.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:a5ae282abe60a2db0fd407072aff4599c279bcd6e9a2475500fc35b00a57c532", size = 6678797, upload-time = "2025-02-13T16:49:15.217Z" },
-    { url = "https://files.pythonhosted.org/packages/3f/19/bcd641ccf19ac25abb6fb1dcd7744840c11f9d62519d7057b6ab2096eb60/numpy-2.2.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5266de33d4c3420973cf9ae3b98b54a2a6d53a559310e3236c4b2b06b9c07d4e", size = 14067362, upload-time = "2025-02-13T16:49:36.17Z" },
-    { url = "https://files.pythonhosted.org/packages/39/04/78d2e7402fb479d893953fb78fa7045f7deb635ec095b6b4f0260223091a/numpy-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b787adbf04b0db1967798dba8da1af07e387908ed1553a0d6e74c084d1ceafe", size = 16116679, upload-time = "2025-02-13T16:50:00.079Z" },
-    { url = "https://files.pythonhosted.org/packages/d0/a1/e90f7aa66512be3150cb9d27f3d9995db330ad1b2046474a13b7040dfd92/numpy-2.2.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:34c1b7e83f94f3b564b35f480f5652a47007dd91f7c839f404d03279cc8dd021", size = 15264272, upload-time = "2025-02-13T16:50:23.121Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/b6/50bd027cca494de4fa1fc7bf1662983d0ba5f256fa0ece2c376b5eb9b3f0/numpy-2.2.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:4d8335b5f1b6e2bce120d55fb17064b0262ff29b459e8493d1785c18ae2553b8", size = 17880549, upload-time = "2025-02-13T16:50:50.778Z" },
-    { url = "https://files.pythonhosted.org/packages/96/30/f7bf4acb5f8db10a96f73896bdeed7a63373137b131ca18bd3dab889db3b/numpy-2.2.3-cp312-cp312-win32.whl", hash = "sha256:4d9828d25fb246bedd31e04c9e75714a4087211ac348cb39c8c5f99dbb6683fe", size = 6293394, upload-time = "2025-02-13T16:51:02.031Z" },
-    { url = "https://files.pythonhosted.org/packages/42/6e/55580a538116d16ae7c9aa17d4edd56e83f42126cb1dfe7a684da7925d2c/numpy-2.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:83807d445817326b4bcdaaaf8e8e9f1753da04341eceec705c001ff342002e5d", size = 12626357, upload-time = "2025-02-13T16:51:21.821Z" },
-    { url = "https://files.pythonhosted.org/packages/0e/8b/88b98ed534d6a03ba8cddb316950fe80842885709b58501233c29dfa24a9/numpy-2.2.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7bfdb06b395385ea9b91bf55c1adf1b297c9fdb531552845ff1d3ea6e40d5aba", size = 20916001, upload-time = "2025-02-13T16:51:52.612Z" },
-    { url = "https://files.pythonhosted.org/packages/d9/b4/def6ec32c725cc5fbd8bdf8af80f616acf075fe752d8a23e895da8c67b70/numpy-2.2.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:23c9f4edbf4c065fddb10a4f6e8b6a244342d95966a48820c614891e5059bb50", size = 14130721, upload-time = "2025-02-13T16:52:31.998Z" },
-    { url = "https://files.pythonhosted.org/packages/20/60/70af0acc86495b25b672d403e12cb25448d79a2b9658f4fc45e845c397a8/numpy-2.2.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:a0c03b6be48aaf92525cccf393265e02773be8fd9551a2f9adbe7db1fa2b60f1", size = 5130999, upload-time = "2025-02-13T16:52:41.545Z" },
-    { url = "https://files.pythonhosted.org/packages/2e/69/d96c006fb73c9a47bcb3611417cf178049aae159afae47c48bd66df9c536/numpy-2.2.3-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:2376e317111daa0a6739e50f7ee2a6353f768489102308b0d98fcf4a04f7f3b5", size = 6665299, upload-time = "2025-02-13T16:52:54.96Z" },
-    { url = "https://files.pythonhosted.org/packages/5a/3f/d8a877b6e48103733ac224ffa26b30887dc9944ff95dffdfa6c4ce3d7df3/numpy-2.2.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8fb62fe3d206d72fe1cfe31c4a1106ad2b136fcc1606093aeab314f02930fdf2", size = 14064096, upload-time = "2025-02-13T16:53:29.678Z" },
-    { url = "https://files.pythonhosted.org/packages/e4/43/619c2c7a0665aafc80efca465ddb1f260287266bdbdce517396f2f145d49/numpy-2.2.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:52659ad2534427dffcc36aac76bebdd02b67e3b7a619ac67543bc9bfe6b7cdb1", size = 16114758, upload-time = "2025-02-13T16:54:03.466Z" },
-    { url = "https://files.pythonhosted.org/packages/d9/79/ee4fe4f60967ccd3897aa71ae14cdee9e3c097e3256975cc9575d393cb42/numpy-2.2.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1b416af7d0ed3271cad0f0a0d0bee0911ed7eba23e66f8424d9f3dfcdcae1304", size = 15259880, upload-time = "2025-02-13T16:54:26.744Z" },
-    { url = "https://files.pythonhosted.org/packages/fb/c8/8b55cf05db6d85b7a7d414b3d1bd5a740706df00bfa0824a08bf041e52ee/numpy-2.2.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1402da8e0f435991983d0a9708b779f95a8c98c6b18a171b9f1be09005e64d9d", size = 17876721, upload-time = "2025-02-13T16:54:53.751Z" },
-    { url = "https://files.pythonhosted.org/packages/21/d6/b4c2f0564b7dcc413117b0ffbb818d837e4b29996b9234e38b2025ed24e7/numpy-2.2.3-cp313-cp313-win32.whl", hash = "sha256:136553f123ee2951bfcfbc264acd34a2fc2f29d7cdf610ce7daf672b6fbaa693", size = 6290195, upload-time = "2025-02-13T16:58:31.683Z" },
-    { url = "https://files.pythonhosted.org/packages/97/e7/7d55a86719d0de7a6a597949f3febefb1009435b79ba510ff32f05a8c1d7/numpy-2.2.3-cp313-cp313-win_amd64.whl", hash = "sha256:5b732c8beef1d7bc2d9e476dbba20aaff6167bf205ad9aa8d30913859e82884b", size = 12619013, upload-time = "2025-02-13T16:58:50.693Z" },
-    { url = "https://files.pythonhosted.org/packages/a6/1f/0b863d5528b9048fd486a56e0b97c18bf705e88736c8cea7239012119a54/numpy-2.2.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:435e7a933b9fda8126130b046975a968cc2d833b505475e588339e09f7672890", size = 20944621, upload-time = "2025-02-13T16:55:27.593Z" },
-    { url = "https://files.pythonhosted.org/packages/aa/99/b478c384f7a0a2e0736177aafc97dc9152fc036a3fdb13f5a3ab225f1494/numpy-2.2.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:7678556eeb0152cbd1522b684dcd215250885993dd00adb93679ec3c0e6e091c", size = 14142502, upload-time = "2025-02-13T16:55:52.039Z" },
-    { url = "https://files.pythonhosted.org/packages/fb/61/2d9a694a0f9cd0a839501d362de2a18de75e3004576a3008e56bdd60fcdb/numpy-2.2.3-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:2e8da03bd561504d9b20e7a12340870dfc206c64ea59b4cfee9fceb95070ee94", size = 5176293, upload-time = "2025-02-13T16:56:01.372Z" },
-    { url = "https://files.pythonhosted.org/packages/33/35/51e94011b23e753fa33f891f601e5c1c9a3d515448659b06df9d40c0aa6e/numpy-2.2.3-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:c9aa4496fd0e17e3843399f533d62857cef5900facf93e735ef65aa4bbc90ef0", size = 6691874, upload-time = "2025-02-13T16:56:12.842Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/cf/06e37619aad98a9d03bd8d65b8e3041c3a639be0f5f6b0a0e2da544538d4/numpy-2.2.3-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f4ca91d61a4bf61b0f2228f24bbfa6a9facd5f8af03759fe2a655c50ae2c6610", size = 14036826, upload-time = "2025-02-13T16:56:33.453Z" },
-    { url = "https://files.pythonhosted.org/packages/0c/93/5d7d19955abd4d6099ef4a8ee006f9ce258166c38af259f9e5558a172e3e/numpy-2.2.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:deaa09cd492e24fd9b15296844c0ad1b3c976da7907e1c1ed3a0ad21dded6f76", size = 16096567, upload-time = "2025-02-13T16:56:58.035Z" },
-    { url = "https://files.pythonhosted.org/packages/af/53/d1c599acf7732d81f46a93621dab6aa8daad914b502a7a115b3f17288ab2/numpy-2.2.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:246535e2f7496b7ac85deffe932896a3577be7af8fb7eebe7146444680297e9a", size = 15242514, upload-time = "2025-02-13T16:57:22.124Z" },
-    { url = "https://files.pythonhosted.org/packages/53/43/c0f5411c7b3ea90adf341d05ace762dad8cb9819ef26093e27b15dd121ac/numpy-2.2.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:daf43a3d1ea699402c5a850e5313680ac355b4adc9770cd5cfc2940e7861f1bf", size = 17872920, upload-time = "2025-02-13T16:57:49.308Z" },
-    { url = "https://files.pythonhosted.org/packages/5b/57/6dbdd45ab277aff62021cafa1e15f9644a52f5b5fc840bc7591b4079fb58/numpy-2.2.3-cp313-cp313t-win32.whl", hash = "sha256:cf802eef1f0134afb81fef94020351be4fe1d6681aadf9c5e862af6602af64ef", size = 6346584, upload-time = "2025-02-13T16:58:02.02Z" },
-    { url = "https://files.pythonhosted.org/packages/97/9b/484f7d04b537d0a1202a5ba81c6f53f1846ae6c63c2127f8df869ed31342/numpy-2.2.3-cp313-cp313t-win_amd64.whl", hash = "sha256:aee2512827ceb6d7f517c8b85aa5d3923afe8fc7a57d028cffcd522f1c6fd082", size = 12706784, upload-time = "2025-02-13T16:58:21.038Z" },
-]
-
-[[package]]
-name = "packaging"
-version = "25.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727, upload-time = "2025-04-19T11:48:59.673Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" },
-]
-
-[[package]]
-name = "pandas"
-version = "2.2.3"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "numpy" },
-    { name = "python-dateutil" },
-    { name = "pytz" },
-    { name = "tzdata" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/9c/d6/9f8431bacc2e19dca897724cd097b1bb224a6ad5433784a44b587c7c13af/pandas-2.2.3.tar.gz", hash = "sha256:4f18ba62b61d7e192368b84517265a99b4d7ee8912f8708660fb4a366cc82667", size = 4399213, upload-time = "2024-09-20T13:10:04.827Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/17/a3/fb2734118db0af37ea7433f57f722c0a56687e14b14690edff0cdb4b7e58/pandas-2.2.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b1d432e8d08679a40e2a6d8b2f9770a5c21793a6f9f47fdd52c5ce1948a5a8a9", size = 12529893, upload-time = "2024-09-20T13:09:09.655Z" },
-    { url = "https://files.pythonhosted.org/packages/e1/0c/ad295fd74bfac85358fd579e271cded3ac969de81f62dd0142c426b9da91/pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a5a1595fe639f5988ba6a8e5bc9649af3baf26df3998a0abe56c02609392e0a4", size = 11363475, upload-time = "2024-09-20T13:09:14.718Z" },
-    { url = "https://files.pythonhosted.org/packages/c6/2a/4bba3f03f7d07207481fed47f5b35f556c7441acddc368ec43d6643c5777/pandas-2.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5de54125a92bb4d1c051c0659e6fcb75256bf799a732a87184e5ea503965bce3", size = 15188645, upload-time = "2024-09-20T19:02:03.88Z" },
-    { url = "https://files.pythonhosted.org/packages/38/f8/d8fddee9ed0d0c0f4a2132c1dfcf0e3e53265055da8df952a53e7eaf178c/pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319", size = 12739445, upload-time = "2024-09-20T13:09:17.621Z" },
-    { url = "https://files.pythonhosted.org/packages/20/e8/45a05d9c39d2cea61ab175dbe6a2de1d05b679e8de2011da4ee190d7e748/pandas-2.2.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dfcb5ee8d4d50c06a51c2fffa6cff6272098ad6540aed1a76d15fb9318194d8", size = 16359235, upload-time = "2024-09-20T19:02:07.094Z" },
-    { url = "https://files.pythonhosted.org/packages/1d/99/617d07a6a5e429ff90c90da64d428516605a1ec7d7bea494235e1c3882de/pandas-2.2.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a", size = 14056756, upload-time = "2024-09-20T13:09:20.474Z" },
-    { url = "https://files.pythonhosted.org/packages/29/d4/1244ab8edf173a10fd601f7e13b9566c1b525c4f365d6bee918e68381889/pandas-2.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:59ef3764d0fe818125a5097d2ae867ca3fa64df032331b7e0917cf5d7bf66b13", size = 11504248, upload-time = "2024-09-20T13:09:23.137Z" },
-    { url = "https://files.pythonhosted.org/packages/64/22/3b8f4e0ed70644e85cfdcd57454686b9057c6c38d2f74fe4b8bc2527214a/pandas-2.2.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f00d1345d84d8c86a63e476bb4955e46458b304b9575dcf71102b5c705320015", size = 12477643, upload-time = "2024-09-20T13:09:25.522Z" },
-    { url = "https://files.pythonhosted.org/packages/e4/93/b3f5d1838500e22c8d793625da672f3eec046b1a99257666c94446969282/pandas-2.2.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3508d914817e153ad359d7e069d752cdd736a247c322d932eb89e6bc84217f28", size = 11281573, upload-time = "2024-09-20T13:09:28.012Z" },
-    { url = "https://files.pythonhosted.org/packages/f5/94/6c79b07f0e5aab1dcfa35a75f4817f5c4f677931d4234afcd75f0e6a66ca/pandas-2.2.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:22a9d949bfc9a502d320aa04e5d02feab689d61da4e7764b62c30b991c42c5f0", size = 15196085, upload-time = "2024-09-20T19:02:10.451Z" },
-    { url = "https://files.pythonhosted.org/packages/e8/31/aa8da88ca0eadbabd0a639788a6da13bb2ff6edbbb9f29aa786450a30a91/pandas-2.2.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3a255b2c19987fbbe62a9dfd6cff7ff2aa9ccab3fc75218fd4b7530f01efa24", size = 12711809, upload-time = "2024-09-20T13:09:30.814Z" },
-    { url = "https://files.pythonhosted.org/packages/ee/7c/c6dbdb0cb2a4344cacfb8de1c5808ca885b2e4dcfde8008266608f9372af/pandas-2.2.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:800250ecdadb6d9c78eae4990da62743b857b470883fa27f652db8bdde7f6659", size = 16356316, upload-time = "2024-09-20T19:02:13.825Z" },
-    { url = "https://files.pythonhosted.org/packages/57/b7/8b757e7d92023b832869fa8881a992696a0bfe2e26f72c9ae9f255988d42/pandas-2.2.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6374c452ff3ec675a8f46fd9ab25c4ad0ba590b71cf0656f8b6daa5202bca3fb", size = 14022055, upload-time = "2024-09-20T13:09:33.462Z" },
-    { url = "https://files.pythonhosted.org/packages/3b/bc/4b18e2b8c002572c5a441a64826252ce5da2aa738855747247a971988043/pandas-2.2.3-cp313-cp313-win_amd64.whl", hash = "sha256:61c5ad4043f791b61dd4752191d9f07f0ae412515d59ba8f005832a532f8736d", size = 11481175, upload-time = "2024-09-20T13:09:35.871Z" },
-    { url = "https://files.pythonhosted.org/packages/76/a3/a5d88146815e972d40d19247b2c162e88213ef51c7c25993942c39dbf41d/pandas-2.2.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3b71f27954685ee685317063bf13c7709a7ba74fc996b84fc6821c59b0f06468", size = 12615650, upload-time = "2024-09-20T13:09:38.685Z" },
-    { url = "https://files.pythonhosted.org/packages/9c/8c/f0fd18f6140ddafc0c24122c8a964e48294acc579d47def376fef12bcb4a/pandas-2.2.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:38cf8125c40dae9d5acc10fa66af8ea6fdf760b2714ee482ca691fc66e6fcb18", size = 11290177, upload-time = "2024-09-20T13:09:41.141Z" },
-    { url = "https://files.pythonhosted.org/packages/ed/f9/e995754eab9c0f14c6777401f7eece0943840b7a9fc932221c19d1abee9f/pandas-2.2.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ba96630bc17c875161df3818780af30e43be9b166ce51c9a18c1feae342906c2", size = 14651526, upload-time = "2024-09-20T19:02:16.905Z" },
-    { url = "https://files.pythonhosted.org/packages/25/b0/98d6ae2e1abac4f35230aa756005e8654649d305df9a28b16b9ae4353bff/pandas-2.2.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1db71525a1538b30142094edb9adc10be3f3e176748cd7acc2240c2f2e5aa3a4", size = 11871013, upload-time = "2024-09-20T13:09:44.39Z" },
-    { url = "https://files.pythonhosted.org/packages/cc/57/0f72a10f9db6a4628744c8e8f0df4e6e21de01212c7c981d31e50ffc8328/pandas-2.2.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:15c0e1e02e93116177d29ff83e8b1619c93ddc9c49083f237d4312337a61165d", size = 15711620, upload-time = "2024-09-20T19:02:20.639Z" },
-    { url = "https://files.pythonhosted.org/packages/ab/5f/b38085618b950b79d2d9164a711c52b10aefc0ae6833b96f626b7021b2ed/pandas-2.2.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ad5b65698ab28ed8d7f18790a0dc58005c7629f227be9ecc1072aa74c0c1d43a", size = 13098436, upload-time = "2024-09-20T13:09:48.112Z" },
-]
-
-[[package]]
-name = "pathspec"
-version = "0.12.1"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/ca/bc/f35b8446f4531a7cb215605d100cd88b7ac6f44ab3fc94870c120ab3adbf/pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712", size = 51043, upload-time = "2023-12-10T22:30:45Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191, upload-time = "2023-12-10T22:30:43.14Z" },
-]
-
-[[package]]
-name = "platformdirs"
-version = "4.3.8"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/fe/8b/3c73abc9c759ecd3f1f7ceff6685840859e8070c4d947c93fae71f6a0bf2/platformdirs-4.3.8.tar.gz", hash = "sha256:3d512d96e16bcb959a814c9f348431070822a6496326a4be0911c40b5a74c2bc", size = 21362, upload-time = "2025-05-07T22:47:42.121Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/fe/39/979e8e21520d4e47a0bbe349e2713c0aac6f3d853d0e5b34d76206c439aa/platformdirs-4.3.8-py3-none-any.whl", hash = "sha256:ff7059bb7eb1179e2685604f4aaf157cfd9535242bd23742eadc3c13542139b4", size = 18567, upload-time = "2025-05-07T22:47:40.376Z" },
-]
-
-[[package]]
-name = "pluggy"
-version = "1.6.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
-]
-
-[[package]]
-name = "pre-commit"
-version = "4.2.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "cfgv" },
-    { name = "identify" },
-    { name = "nodeenv" },
-    { name = "pyyaml" },
-    { name = "virtualenv" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/08/39/679ca9b26c7bb2999ff122d50faa301e49af82ca9c066ec061cfbc0c6784/pre_commit-4.2.0.tar.gz", hash = "sha256:601283b9757afd87d40c4c4a9b2b5de9637a8ea02eaff7adc2d0fb4e04841146", size = 193424, upload-time = "2025-03-18T21:35:20.987Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/88/74/a88bf1b1efeae488a0c0b7bdf71429c313722d1fc0f377537fbe554e6180/pre_commit-4.2.0-py2.py3-none-any.whl", hash = "sha256:a009ca7205f1eb497d10b845e52c838a98b6cdd2102a6c8e4540e94ee75c58bd", size = 220707, upload-time = "2025-03-18T21:35:19.343Z" },
-]
-
-[[package]]
-name = "prompt-toolkit"
-version = "3.0.50"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "wcwidth" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/a1/e1/bd15cb8ffdcfeeb2bdc215de3c3cffca11408d829e4b8416dcfe71ba8854/prompt_toolkit-3.0.50.tar.gz", hash = "sha256:544748f3860a2623ca5cd6d2795e7a14f3d0e1c3c9728359013f79877fc89bab", size = 429087, upload-time = "2025-01-20T15:55:35.072Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/e4/ea/d836f008d33151c7a1f62caf3d8dd782e4d15f6a43897f64480c2b8de2ad/prompt_toolkit-3.0.50-py3-none-any.whl", hash = "sha256:9b6427eb19e479d98acff65196a307c555eb567989e6d88ebbb1b509d9779198", size = 387816, upload-time = "2025-01-20T15:55:29.98Z" },
-]
-
-[[package]]
-name = "pyaml"
-version = "25.1.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "pyyaml" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/f4/06/04b9c1907c13dc81729a9c6b4f42eab47baab7a8738ed5d2683eac215ad0/pyaml-25.1.0.tar.gz", hash = "sha256:33a93ac49218f57e020b81e280d2706cea554ac5a76445ac79add760d019c709", size = 29469, upload-time = "2025-01-01T14:52:46.684Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/69/c1/ec1930bc6c01754b8baf3c99420f340b920561f0060bccbf81809db354cc/pyaml-25.1.0-py3-none-any.whl", hash = "sha256:f7b40629d2dae88035657c860f539db3525ddd0120a11e0bcb44d47d5968b3bc", size = 26074, upload-time = "2025-01-01T14:52:45.006Z" },
-]
-
-[[package]]
-name = "pydantic"
-version = "2.10.6"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "annotated-types" },
-    { name = "pydantic-core" },
-    { name = "typing-extensions" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/b7/ae/d5220c5c52b158b1de7ca89fc5edb72f304a70a4c540c84c8844bf4008de/pydantic-2.10.6.tar.gz", hash = "sha256:ca5daa827cce33de7a42be142548b0096bf05a7e7b365aebfa5f8eeec7128236", size = 761681, upload-time = "2025-01-24T01:42:12.693Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/f4/3c/8cc1cc84deffa6e25d2d0c688ebb80635dfdbf1dbea3e30c541c8cf4d860/pydantic-2.10.6-py3-none-any.whl", hash = "sha256:427d664bf0b8a2b34ff5dd0f5a18df00591adcee7198fbd71981054cef37b584", size = 431696, upload-time = "2025-01-24T01:42:10.371Z" },
-]
-
-[[package]]
-name = "pydantic-core"
-version = "2.27.2"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "typing-extensions" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/fc/01/f3e5ac5e7c25833db5eb555f7b7ab24cd6f8c322d3a3ad2d67a952dc0abc/pydantic_core-2.27.2.tar.gz", hash = "sha256:eb026e5a4c1fee05726072337ff51d1efb6f59090b7da90d30ea58625b1ffb39", size = 413443, upload-time = "2024-12-18T11:31:54.917Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/d6/74/51c8a5482ca447871c93e142d9d4a92ead74de6c8dc5e66733e22c9bba89/pydantic_core-2.27.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:9e0c8cfefa0ef83b4da9588448b6d8d2a2bf1a53c3f1ae5fca39eb3061e2f0b0", size = 1893127, upload-time = "2024-12-18T11:28:30.346Z" },
-    { url = "https://files.pythonhosted.org/packages/d3/f3/c97e80721735868313c58b89d2de85fa80fe8dfeeed84dc51598b92a135e/pydantic_core-2.27.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:83097677b8e3bd7eaa6775720ec8e0405f1575015a463285a92bfdfe254529ef", size = 1811340, upload-time = "2024-12-18T11:28:32.521Z" },
-    { url = "https://files.pythonhosted.org/packages/9e/91/840ec1375e686dbae1bd80a9e46c26a1e0083e1186abc610efa3d9a36180/pydantic_core-2.27.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:172fce187655fece0c90d90a678424b013f8fbb0ca8b036ac266749c09438cb7", size = 1822900, upload-time = "2024-12-18T11:28:34.507Z" },
-    { url = "https://files.pythonhosted.org/packages/f6/31/4240bc96025035500c18adc149aa6ffdf1a0062a4b525c932065ceb4d868/pydantic_core-2.27.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:519f29f5213271eeeeb3093f662ba2fd512b91c5f188f3bb7b27bc5973816934", size = 1869177, upload-time = "2024-12-18T11:28:36.488Z" },
-    { url = "https://files.pythonhosted.org/packages/fa/20/02fbaadb7808be578317015c462655c317a77a7c8f0ef274bc016a784c54/pydantic_core-2.27.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:05e3a55d124407fffba0dd6b0c0cd056d10e983ceb4e5dbd10dda135c31071d6", size = 2038046, upload-time = "2024-12-18T11:28:39.409Z" },
-    { url = "https://files.pythonhosted.org/packages/06/86/7f306b904e6c9eccf0668248b3f272090e49c275bc488a7b88b0823444a4/pydantic_core-2.27.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9c3ed807c7b91de05e63930188f19e921d1fe90de6b4f5cd43ee7fcc3525cb8c", size = 2685386, upload-time = "2024-12-18T11:28:41.221Z" },
-    { url = "https://files.pythonhosted.org/packages/8d/f0/49129b27c43396581a635d8710dae54a791b17dfc50c70164866bbf865e3/pydantic_core-2.27.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6fb4aadc0b9a0c063206846d603b92030eb6f03069151a625667f982887153e2", size = 1997060, upload-time = "2024-12-18T11:28:44.709Z" },
-    { url = "https://files.pythonhosted.org/packages/0d/0f/943b4af7cd416c477fd40b187036c4f89b416a33d3cc0ab7b82708a667aa/pydantic_core-2.27.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:28ccb213807e037460326424ceb8b5245acb88f32f3d2777427476e1b32c48c4", size = 2004870, upload-time = "2024-12-18T11:28:46.839Z" },
-    { url = "https://files.pythonhosted.org/packages/35/40/aea70b5b1a63911c53a4c8117c0a828d6790483f858041f47bab0b779f44/pydantic_core-2.27.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:de3cd1899e2c279b140adde9357c4495ed9d47131b4a4eaff9052f23398076b3", size = 1999822, upload-time = "2024-12-18T11:28:48.896Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/b3/807b94fd337d58effc5498fd1a7a4d9d59af4133e83e32ae39a96fddec9d/pydantic_core-2.27.2-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:220f892729375e2d736b97d0e51466252ad84c51857d4d15f5e9692f9ef12be4", size = 2130364, upload-time = "2024-12-18T11:28:50.755Z" },
-    { url = "https://files.pythonhosted.org/packages/fc/df/791c827cd4ee6efd59248dca9369fb35e80a9484462c33c6649a8d02b565/pydantic_core-2.27.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a0fcd29cd6b4e74fe8ddd2c90330fd8edf2e30cb52acda47f06dd615ae72da57", size = 2158303, upload-time = "2024-12-18T11:28:54.122Z" },
-    { url = "https://files.pythonhosted.org/packages/9b/67/4e197c300976af185b7cef4c02203e175fb127e414125916bf1128b639a9/pydantic_core-2.27.2-cp312-cp312-win32.whl", hash = "sha256:1e2cb691ed9834cd6a8be61228471d0a503731abfb42f82458ff27be7b2186fc", size = 1834064, upload-time = "2024-12-18T11:28:56.074Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/ea/cd7209a889163b8dcca139fe32b9687dd05249161a3edda62860430457a5/pydantic_core-2.27.2-cp312-cp312-win_amd64.whl", hash = "sha256:cc3f1a99a4f4f9dd1de4fe0312c114e740b5ddead65bb4102884b384c15d8bc9", size = 1989046, upload-time = "2024-12-18T11:28:58.107Z" },
-    { url = "https://files.pythonhosted.org/packages/bc/49/c54baab2f4658c26ac633d798dab66b4c3a9bbf47cff5284e9c182f4137a/pydantic_core-2.27.2-cp312-cp312-win_arm64.whl", hash = "sha256:3911ac9284cd8a1792d3cb26a2da18f3ca26c6908cc434a18f730dc0db7bfa3b", size = 1885092, upload-time = "2024-12-18T11:29:01.335Z" },
-    { url = "https://files.pythonhosted.org/packages/41/b1/9bc383f48f8002f99104e3acff6cba1231b29ef76cfa45d1506a5cad1f84/pydantic_core-2.27.2-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:7d14bd329640e63852364c306f4d23eb744e0f8193148d4044dd3dacdaacbd8b", size = 1892709, upload-time = "2024-12-18T11:29:03.193Z" },
-    { url = "https://files.pythonhosted.org/packages/10/6c/e62b8657b834f3eb2961b49ec8e301eb99946245e70bf42c8817350cbefc/pydantic_core-2.27.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:82f91663004eb8ed30ff478d77c4d1179b3563df6cdb15c0817cd1cdaf34d154", size = 1811273, upload-time = "2024-12-18T11:29:05.306Z" },
-    { url = "https://files.pythonhosted.org/packages/ba/15/52cfe49c8c986e081b863b102d6b859d9defc63446b642ccbbb3742bf371/pydantic_core-2.27.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:71b24c7d61131bb83df10cc7e687433609963a944ccf45190cfc21e0887b08c9", size = 1823027, upload-time = "2024-12-18T11:29:07.294Z" },
-    { url = "https://files.pythonhosted.org/packages/b1/1c/b6f402cfc18ec0024120602bdbcebc7bdd5b856528c013bd4d13865ca473/pydantic_core-2.27.2-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fa8e459d4954f608fa26116118bb67f56b93b209c39b008277ace29937453dc9", size = 1868888, upload-time = "2024-12-18T11:29:09.249Z" },
-    { url = "https://files.pythonhosted.org/packages/bd/7b/8cb75b66ac37bc2975a3b7de99f3c6f355fcc4d89820b61dffa8f1e81677/pydantic_core-2.27.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ce8918cbebc8da707ba805b7fd0b382816858728ae7fe19a942080c24e5b7cd1", size = 2037738, upload-time = "2024-12-18T11:29:11.23Z" },
-    { url = "https://files.pythonhosted.org/packages/c8/f1/786d8fe78970a06f61df22cba58e365ce304bf9b9f46cc71c8c424e0c334/pydantic_core-2.27.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eda3f5c2a021bbc5d976107bb302e0131351c2ba54343f8a496dc8783d3d3a6a", size = 2685138, upload-time = "2024-12-18T11:29:16.396Z" },
-    { url = "https://files.pythonhosted.org/packages/a6/74/d12b2cd841d8724dc8ffb13fc5cef86566a53ed358103150209ecd5d1999/pydantic_core-2.27.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bd8086fa684c4775c27f03f062cbb9eaa6e17f064307e86b21b9e0abc9c0f02e", size = 1997025, upload-time = "2024-12-18T11:29:20.25Z" },
-    { url = "https://files.pythonhosted.org/packages/a0/6e/940bcd631bc4d9a06c9539b51f070b66e8f370ed0933f392db6ff350d873/pydantic_core-2.27.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8d9b3388db186ba0c099a6d20f0604a44eabdeef1777ddd94786cdae158729e4", size = 2004633, upload-time = "2024-12-18T11:29:23.877Z" },
-    { url = "https://files.pythonhosted.org/packages/50/cc/a46b34f1708d82498c227d5d80ce615b2dd502ddcfd8376fc14a36655af1/pydantic_core-2.27.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:7a66efda2387de898c8f38c0cf7f14fca0b51a8ef0b24bfea5849f1b3c95af27", size = 1999404, upload-time = "2024-12-18T11:29:25.872Z" },
-    { url = "https://files.pythonhosted.org/packages/ca/2d/c365cfa930ed23bc58c41463bae347d1005537dc8db79e998af8ba28d35e/pydantic_core-2.27.2-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:18a101c168e4e092ab40dbc2503bdc0f62010e95d292b27827871dc85450d7ee", size = 2130130, upload-time = "2024-12-18T11:29:29.252Z" },
-    { url = "https://files.pythonhosted.org/packages/f4/d7/eb64d015c350b7cdb371145b54d96c919d4db516817f31cd1c650cae3b21/pydantic_core-2.27.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ba5dd002f88b78a4215ed2f8ddbdf85e8513382820ba15ad5ad8955ce0ca19a1", size = 2157946, upload-time = "2024-12-18T11:29:31.338Z" },
-    { url = "https://files.pythonhosted.org/packages/a4/99/bddde3ddde76c03b65dfd5a66ab436c4e58ffc42927d4ff1198ffbf96f5f/pydantic_core-2.27.2-cp313-cp313-win32.whl", hash = "sha256:1ebaf1d0481914d004a573394f4be3a7616334be70261007e47c2a6fe7e50130", size = 1834387, upload-time = "2024-12-18T11:29:33.481Z" },
-    { url = "https://files.pythonhosted.org/packages/71/47/82b5e846e01b26ac6f1893d3c5f9f3a2eb6ba79be26eef0b759b4fe72946/pydantic_core-2.27.2-cp313-cp313-win_amd64.whl", hash = "sha256:953101387ecf2f5652883208769a79e48db18c6df442568a0b5ccd8c2723abee", size = 1990453, upload-time = "2024-12-18T11:29:35.533Z" },
-    { url = "https://files.pythonhosted.org/packages/51/b2/b2b50d5ecf21acf870190ae5d093602d95f66c9c31f9d5de6062eb329ad1/pydantic_core-2.27.2-cp313-cp313-win_arm64.whl", hash = "sha256:ac4dbfd1691affb8f48c2c13241a2e3b60ff23247cbcf981759c768b6633cf8b", size = 1885186, upload-time = "2024-12-18T11:29:37.649Z" },
-]
-
-[[package]]
-name = "pygments"
-version = "2.19.1"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/7c/2d/c3338d48ea6cc0feb8446d8e6937e1408088a72a39937982cc6111d17f84/pygments-2.19.1.tar.gz", hash = "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f", size = 4968581, upload-time = "2025-01-06T17:26:30.443Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293, upload-time = "2025-01-06T17:26:25.553Z" },
-]
-
-[[package]]
-name = "pytest"
-version = "8.4.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "colorama", marker = "sys_platform == 'win32'" },
-    { name = "iniconfig" },
-    { name = "packaging" },
-    { name = "pluggy" },
-    { name = "pygments" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/fb/aa/405082ce2749be5398045152251ac69c0f3578c7077efc53431303af97ce/pytest-8.4.0.tar.gz", hash = "sha256:14d920b48472ea0dbf68e45b96cd1ffda4705f33307dcc86c676c1b5104838a6", size = 1515232, upload-time = "2025-06-02T17:36:30.03Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/2f/de/afa024cbe022b1b318a3d224125aa24939e99b4ff6f22e0ba639a2eaee47/pytest-8.4.0-py3-none-any.whl", hash = "sha256:f40f825768ad76c0977cbacdf1fd37c6f7a468e460ea6a0636078f8972d4517e", size = 363797, upload-time = "2025-06-02T17:36:27.859Z" },
-]
-
-[[package]]
-name = "pytest-asyncio"
-version = "1.0.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "pytest" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/d0/d4/14f53324cb1a6381bef29d698987625d80052bb33932d8e7cbf9b337b17c/pytest_asyncio-1.0.0.tar.gz", hash = "sha256:d15463d13f4456e1ead2594520216b225a16f781e144f8fdf6c5bb4667c48b3f", size = 46960, upload-time = "2025-05-26T04:54:40.484Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/30/05/ce271016e351fddc8399e546f6e23761967ee09c8c568bbfbecb0c150171/pytest_asyncio-1.0.0-py3-none-any.whl", hash = "sha256:4f024da9f1ef945e680dc68610b52550e36590a67fd31bb3b4943979a1f90ef3", size = 15976, upload-time = "2025-05-26T04:54:39.035Z" },
-]
-
-[[package]]
-name = "python-dateutil"
-version = "2.9.0.post0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "six" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432, upload-time = "2024-03-01T18:36:20.211Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" },
-]
-
-[[package]]
-name = "pytz"
-version = "2025.1"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/5f/57/df1c9157c8d5a05117e455d66fd7cf6dbc46974f832b1058ed4856785d8a/pytz-2025.1.tar.gz", hash = "sha256:c2db42be2a2518b28e65f9207c4d05e6ff547d1efa4086469ef855e4ab70178e", size = 319617, upload-time = "2025-01-31T01:54:48.615Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/eb/38/ac33370d784287baa1c3d538978b5e2ea064d4c1b93ffbd12826c190dd10/pytz-2025.1-py2.py3-none-any.whl", hash = "sha256:89dd22dca55b46eac6eda23b2d72721bf1bdfef212645d81513ef5d03038de57", size = 507930, upload-time = "2025-01-31T01:54:45.634Z" },
-]
-
-[[package]]
-name = "pyyaml"
-version = "6.0.2"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/54/ed/79a089b6be93607fa5cdaedf301d7dfb23af5f25c398d5ead2525b063e17/pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e", size = 130631, upload-time = "2024-08-06T20:33:50.674Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/86/0c/c581167fc46d6d6d7ddcfb8c843a4de25bdd27e4466938109ca68492292c/PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab", size = 183873, upload-time = "2024-08-06T20:32:25.131Z" },
-    { url = "https://files.pythonhosted.org/packages/a8/0c/38374f5bb272c051e2a69281d71cba6fdb983413e6758b84482905e29a5d/PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725", size = 173302, upload-time = "2024-08-06T20:32:26.511Z" },
-    { url = "https://files.pythonhosted.org/packages/c3/93/9916574aa8c00aa06bbac729972eb1071d002b8e158bd0e83a3b9a20a1f7/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5", size = 739154, upload-time = "2024-08-06T20:32:28.363Z" },
-    { url = "https://files.pythonhosted.org/packages/95/0f/b8938f1cbd09739c6da569d172531567dbcc9789e0029aa070856f123984/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425", size = 766223, upload-time = "2024-08-06T20:32:30.058Z" },
-    { url = "https://files.pythonhosted.org/packages/b9/2b/614b4752f2e127db5cc206abc23a8c19678e92b23c3db30fc86ab731d3bd/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476", size = 767542, upload-time = "2024-08-06T20:32:31.881Z" },
-    { url = "https://files.pythonhosted.org/packages/d4/00/dd137d5bcc7efea1836d6264f049359861cf548469d18da90cd8216cf05f/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48", size = 731164, upload-time = "2024-08-06T20:32:37.083Z" },
-    { url = "https://files.pythonhosted.org/packages/c9/1f/4f998c900485e5c0ef43838363ba4a9723ac0ad73a9dc42068b12aaba4e4/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b", size = 756611, upload-time = "2024-08-06T20:32:38.898Z" },
-    { url = "https://files.pythonhosted.org/packages/df/d1/f5a275fdb252768b7a11ec63585bc38d0e87c9e05668a139fea92b80634c/PyYAML-6.0.2-cp312-cp312-win32.whl", hash = "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4", size = 140591, upload-time = "2024-08-06T20:32:40.241Z" },
-    { url = "https://files.pythonhosted.org/packages/0c/e8/4f648c598b17c3d06e8753d7d13d57542b30d56e6c2dedf9c331ae56312e/PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8", size = 156338, upload-time = "2024-08-06T20:32:41.93Z" },
-    { url = "https://files.pythonhosted.org/packages/ef/e3/3af305b830494fa85d95f6d95ef7fa73f2ee1cc8ef5b495c7c3269fb835f/PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba", size = 181309, upload-time = "2024-08-06T20:32:43.4Z" },
-    { url = "https://files.pythonhosted.org/packages/45/9f/3b1c20a0b7a3200524eb0076cc027a970d320bd3a6592873c85c92a08731/PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1", size = 171679, upload-time = "2024-08-06T20:32:44.801Z" },
-    { url = "https://files.pythonhosted.org/packages/7c/9a/337322f27005c33bcb656c655fa78325b730324c78620e8328ae28b64d0c/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133", size = 733428, upload-time = "2024-08-06T20:32:46.432Z" },
-    { url = "https://files.pythonhosted.org/packages/a3/69/864fbe19e6c18ea3cc196cbe5d392175b4cf3d5d0ac1403ec3f2d237ebb5/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484", size = 763361, upload-time = "2024-08-06T20:32:51.188Z" },
-    { url = "https://files.pythonhosted.org/packages/04/24/b7721e4845c2f162d26f50521b825fb061bc0a5afcf9a386840f23ea19fa/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5", size = 759523, upload-time = "2024-08-06T20:32:53.019Z" },
-    { url = "https://files.pythonhosted.org/packages/2b/b2/e3234f59ba06559c6ff63c4e10baea10e5e7df868092bf9ab40e5b9c56b6/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc", size = 726660, upload-time = "2024-08-06T20:32:54.708Z" },
-    { url = "https://files.pythonhosted.org/packages/fe/0f/25911a9f080464c59fab9027482f822b86bf0608957a5fcc6eaac85aa515/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652", size = 751597, upload-time = "2024-08-06T20:32:56.985Z" },
-    { url = "https://files.pythonhosted.org/packages/14/0d/e2c3b43bbce3cf6bd97c840b46088a3031085179e596d4929729d8d68270/PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183", size = 140527, upload-time = "2024-08-06T20:33:03.001Z" },
-    { url = "https://files.pythonhosted.org/packages/fa/de/02b54f42487e3d3c6efb3f89428677074ca7bf43aae402517bc7cca949f3/PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563", size = 156446, upload-time = "2024-08-06T20:33:04.33Z" },
-]
-
-[[package]]
-name = "requests"
-version = "2.32.4"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "certifi" },
-    { name = "charset-normalizer" },
-    { name = "idna" },
-    { name = "urllib3" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/e1/0a/929373653770d8a0d7ea76c37de6e41f11eb07559b103b1c02cafb3f7cf8/requests-2.32.4.tar.gz", hash = "sha256:27d0316682c8a29834d3264820024b62a36942083d52caf2f14c0591336d3422", size = 135258, upload-time = "2025-06-09T16:43:07.34Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/7c/e4/56027c4a6b4ae70ca9de302488c5ca95ad4a39e190093d6c1a8ace08341b/requests-2.32.4-py3-none-any.whl", hash = "sha256:27babd3cda2a6d50b30443204ee89830707d396671944c998b5975b031ac2b2c", size = 64847, upload-time = "2025-06-09T16:43:05.728Z" },
-]
-
-[[package]]
-name = "respx"
-version = "0.22.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "httpx" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/f4/7c/96bd0bc759cf009675ad1ee1f96535edcb11e9666b985717eb8c87192a95/respx-0.22.0.tar.gz", hash = "sha256:3c8924caa2a50bd71aefc07aa812f2466ff489f1848c96e954a5362d17095d91", size = 28439, upload-time = "2024-12-19T22:33:59.374Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/8e/67/afbb0978d5399bc9ea200f1d4489a23c9a1dad4eee6376242b8182389c79/respx-0.22.0-py2.py3-none-any.whl", hash = "sha256:631128d4c9aba15e56903fb5f66fb1eff412ce28dd387ca3a81339e52dbd3ad0", size = 25127, upload-time = "2024-12-19T22:33:57.837Z" },
-]
-
-[[package]]
-name = "rich"
-version = "13.9.4"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "markdown-it-py" },
-    { name = "pygments" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/ab/3a/0316b28d0761c6734d6bc14e770d85506c986c85ffb239e688eeaab2c2bc/rich-13.9.4.tar.gz", hash = "sha256:439594978a49a09530cff7ebc4b5c7103ef57baf48d5ea3184f21d9a2befa098", size = 223149, upload-time = "2024-11-01T16:43:57.873Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/19/71/39c7c0d87f8d4e6c020a393182060eaefeeae6c01dab6a84ec346f2567df/rich-13.9.4-py3-none-any.whl", hash = "sha256:6049d5e6ec054bf2779ab3358186963bac2ea89175919d699e378b99738c2a90", size = 242424, upload-time = "2024-11-01T16:43:55.817Z" },
-]
-
-[[package]]
-name = "ruff"
-version = "0.11.13"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/ed/da/9c6f995903b4d9474b39da91d2d626659af3ff1eeb43e9ae7c119349dba6/ruff-0.11.13.tar.gz", hash = "sha256:26fa247dc68d1d4e72c179e08889a25ac0c7ba4d78aecfc835d49cbfd60bf514", size = 4282054, upload-time = "2025-06-05T21:00:15.721Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/7d/ce/a11d381192966e0b4290842cc8d4fac7dc9214ddf627c11c1afff87da29b/ruff-0.11.13-py3-none-linux_armv6l.whl", hash = "sha256:4bdfbf1240533f40042ec00c9e09a3aade6f8c10b6414cf11b519488d2635d46", size = 10292516, upload-time = "2025-06-05T20:59:32.944Z" },
-    { url = "https://files.pythonhosted.org/packages/78/db/87c3b59b0d4e753e40b6a3b4a2642dfd1dcaefbff121ddc64d6c8b47ba00/ruff-0.11.13-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:aef9c9ed1b5ca28bb15c7eac83b8670cf3b20b478195bd49c8d756ba0a36cf48", size = 11106083, upload-time = "2025-06-05T20:59:37.03Z" },
-    { url = "https://files.pythonhosted.org/packages/77/79/d8cec175856ff810a19825d09ce700265f905c643c69f45d2b737e4a470a/ruff-0.11.13-py3-none-macosx_11_0_arm64.whl", hash = "sha256:53b15a9dfdce029c842e9a5aebc3855e9ab7771395979ff85b7c1dedb53ddc2b", size = 10436024, upload-time = "2025-06-05T20:59:39.741Z" },
-    { url = "https://files.pythonhosted.org/packages/8b/5b/f6d94f2980fa1ee854b41568368a2e1252681b9238ab2895e133d303538f/ruff-0.11.13-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ab153241400789138d13f362c43f7edecc0edfffce2afa6a68434000ecd8f69a", size = 10646324, upload-time = "2025-06-05T20:59:42.185Z" },
-    { url = "https://files.pythonhosted.org/packages/6c/9c/b4c2acf24ea4426016d511dfdc787f4ce1ceb835f3c5fbdbcb32b1c63bda/ruff-0.11.13-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6c51f93029d54a910d3d24f7dd0bb909e31b6cd989a5e4ac513f4eb41629f0dc", size = 10174416, upload-time = "2025-06-05T20:59:44.319Z" },
-    { url = "https://files.pythonhosted.org/packages/f3/10/e2e62f77c65ede8cd032c2ca39c41f48feabedb6e282bfd6073d81bb671d/ruff-0.11.13-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1808b3ed53e1a777c2ef733aca9051dc9bf7c99b26ece15cb59a0320fbdbd629", size = 11724197, upload-time = "2025-06-05T20:59:46.935Z" },
-    { url = "https://files.pythonhosted.org/packages/bb/f0/466fe8469b85c561e081d798c45f8a1d21e0b4a5ef795a1d7f1a9a9ec182/ruff-0.11.13-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:d28ce58b5ecf0f43c1b71edffabe6ed7f245d5336b17805803312ec9bc665933", size = 12511615, upload-time = "2025-06-05T20:59:49.534Z" },
-    { url = "https://files.pythonhosted.org/packages/17/0e/cefe778b46dbd0cbcb03a839946c8f80a06f7968eb298aa4d1a4293f3448/ruff-0.11.13-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:55e4bc3a77842da33c16d55b32c6cac1ec5fb0fbec9c8c513bdce76c4f922165", size = 12117080, upload-time = "2025-06-05T20:59:51.654Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/2c/caaeda564cbe103bed145ea557cb86795b18651b0f6b3ff6a10e84e5a33f/ruff-0.11.13-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:633bf2c6f35678c56ec73189ba6fa19ff1c5e4807a78bf60ef487b9dd272cc71", size = 11326315, upload-time = "2025-06-05T20:59:54.469Z" },
-    { url = "https://files.pythonhosted.org/packages/75/f0/782e7d681d660eda8c536962920c41309e6dd4ebcea9a2714ed5127d44bd/ruff-0.11.13-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4ffbc82d70424b275b089166310448051afdc6e914fdab90e08df66c43bb5ca9", size = 11555640, upload-time = "2025-06-05T20:59:56.986Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/d4/3d580c616316c7f07fb3c99dbecfe01fbaea7b6fd9a82b801e72e5de742a/ruff-0.11.13-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:4a9ddd3ec62a9a89578c85842b836e4ac832d4a2e0bfaad3b02243f930ceafcc", size = 10507364, upload-time = "2025-06-05T20:59:59.154Z" },
-    { url = "https://files.pythonhosted.org/packages/5a/dc/195e6f17d7b3ea6b12dc4f3e9de575db7983db187c378d44606e5d503319/ruff-0.11.13-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:d237a496e0778d719efb05058c64d28b757c77824e04ffe8796c7436e26712b7", size = 10141462, upload-time = "2025-06-05T21:00:01.481Z" },
-    { url = "https://files.pythonhosted.org/packages/f4/8e/39a094af6967faa57ecdeacb91bedfb232474ff8c3d20f16a5514e6b3534/ruff-0.11.13-py3-none-musllinux_1_2_i686.whl", hash = "sha256:26816a218ca6ef02142343fd24c70f7cd8c5aa6c203bca284407adf675984432", size = 11121028, upload-time = "2025-06-05T21:00:04.06Z" },
-    { url = "https://files.pythonhosted.org/packages/5a/c0/b0b508193b0e8a1654ec683ebab18d309861f8bd64e3a2f9648b80d392cb/ruff-0.11.13-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:51c3f95abd9331dc5b87c47ac7f376db5616041173826dfd556cfe3d4977f492", size = 11602992, upload-time = "2025-06-05T21:00:06.249Z" },
-    { url = "https://files.pythonhosted.org/packages/7c/91/263e33ab93ab09ca06ce4f8f8547a858cc198072f873ebc9be7466790bae/ruff-0.11.13-py3-none-win32.whl", hash = "sha256:96c27935418e4e8e77a26bb05962817f28b8ef3843a6c6cc49d8783b5507f250", size = 10474944, upload-time = "2025-06-05T21:00:08.459Z" },
-    { url = "https://files.pythonhosted.org/packages/46/f4/7c27734ac2073aae8efb0119cae6931b6fb48017adf048fdf85c19337afc/ruff-0.11.13-py3-none-win_amd64.whl", hash = "sha256:29c3189895a8a6a657b7af4e97d330c8a3afd2c9c8f46c81e2fc5a31866517e3", size = 11548669, upload-time = "2025-06-05T21:00:11.147Z" },
-    { url = "https://files.pythonhosted.org/packages/ec/bf/b273dd11673fed8a6bd46032c0ea2a04b2ac9bfa9c628756a5856ba113b0/ruff-0.11.13-py3-none-win_arm64.whl", hash = "sha256:b4385285e9179d608ff1d2fb9922062663c658605819a6876d8beef0c30b7f3b", size = 10683928, upload-time = "2025-06-05T21:00:13.758Z" },
-]
-
-[[package]]
-name = "six"
-version = "1.17.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031, upload-time = "2024-12-04T17:35:28.174Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" },
-]
-
-[[package]]
-name = "sniffio"
-version = "1.3.1"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372, upload-time = "2024-02-25T23:20:04.057Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" },
-]
-
-[[package]]
-name = "termcolor"
-version = "2.5.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/37/72/88311445fd44c455c7d553e61f95412cf89054308a1aa2434ab835075fc5/termcolor-2.5.0.tar.gz", hash = "sha256:998d8d27da6d48442e8e1f016119076b690d962507531df4890fcd2db2ef8a6f", size = 13057, upload-time = "2024-10-06T19:50:04.115Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/7f/be/df630c387a0a054815d60be6a97eb4e8f17385d5d6fe660e1c02750062b4/termcolor-2.5.0-py3-none-any.whl", hash = "sha256:37b17b5fc1e604945c2642c872a3764b5d547a48009871aea3edd3afa180afb8", size = 7755, upload-time = "2024-10-06T19:50:02.097Z" },
-]
-
-[[package]]
-name = "tqdm"
-version = "4.67.1"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "colorama", marker = "sys_platform == 'win32'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737, upload-time = "2024-11-24T20:12:22.481Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540, upload-time = "2024-11-24T20:12:19.698Z" },
-]
-
-[[package]]
-name = "typing-extensions"
-version = "4.12.2"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/df/db/f35a00659bc03fec321ba8bce9420de607a1d37f8342eee1863174c69557/typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8", size = 85321, upload-time = "2024-06-07T18:52:15.995Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/26/9f/ad63fc0248c5379346306f8668cda6e2e2e9c95e01216d2b8ffd9ff037d0/typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d", size = 37438, upload-time = "2024-06-07T18:52:13.582Z" },
-]
-
-[[package]]
-name = "tzdata"
-version = "2025.1"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/43/0f/fa4723f22942480be4ca9527bbde8d43f6c3f2fe8412f00e7f5f6746bc8b/tzdata-2025.1.tar.gz", hash = "sha256:24894909e88cdb28bd1636c6887801df64cb485bd593f2fd83ef29075a81d694", size = 194950, upload-time = "2025-01-21T19:49:38.686Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/0f/dd/84f10e23edd882c6f968c21c2434fe67bd4a528967067515feca9e611e5e/tzdata-2025.1-py2.py3-none-any.whl", hash = "sha256:7e127113816800496f027041c570f50bcd464a020098a3b6b199517772303639", size = 346762, upload-time = "2025-01-21T19:49:37.187Z" },
-]
-
-[[package]]
-name = "urllib3"
-version = "2.4.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/8a/78/16493d9c386d8e60e442a35feac5e00f0913c0f4b7c217c11e8ec2ff53e0/urllib3-2.4.0.tar.gz", hash = "sha256:414bc6535b787febd7567804cc015fee39daab8ad86268f1310a9250697de466", size = 390672, upload-time = "2025-04-10T15:23:39.232Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/6b/11/cc635220681e93a0183390e26485430ca2c7b5f9d33b15c74c2861cb8091/urllib3-2.4.0-py3-none-any.whl", hash = "sha256:4e16665048960a0900c702d4a66415956a584919c03361cac9f1df5c5dd7e813", size = 128680, upload-time = "2025-04-10T15:23:37.377Z" },
-]
-
-[[package]]
-name = "virtualenv"
-version = "20.31.2"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "distlib" },
-    { name = "filelock" },
-    { name = "platformdirs" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/56/2c/444f465fb2c65f40c3a104fd0c495184c4f2336d65baf398e3c75d72ea94/virtualenv-20.31.2.tar.gz", hash = "sha256:e10c0a9d02835e592521be48b332b6caee6887f332c111aa79a09b9e79efc2af", size = 6076316, upload-time = "2025-05-08T17:58:23.811Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/f3/40/b1c265d4b2b62b58576588510fc4d1fe60a86319c8de99fd8e9fec617d2c/virtualenv-20.31.2-py3-none-any.whl", hash = "sha256:36efd0d9650ee985f0cad72065001e66d49a6f24eb44d98980f630686243cf11", size = 6057982, upload-time = "2025-05-08T17:58:21.15Z" },
-]
-
-[[package]]
-name = "wcwidth"
-version = "0.2.13"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/6c/63/53559446a878410fc5a5974feb13d31d78d752eb18aeba59c7fef1af7598/wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5", size = 101301, upload-time = "2024-01-06T02:10:57.829Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/fd/84/fd2ba7aafacbad3c4201d395674fc6348826569da3c0937e75505ead3528/wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859", size = 34166, upload-time = "2024-01-06T02:10:55.763Z" },
-]

From 9410d73217e5e6ce6d043d0c42a27cd7f740abbc Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Fri, 18 Jul 2025 20:07:34 +0000
Subject: [PATCH 2/4] chore: update SDK settings

---
 .github/workflows/publish-pypi.yml           | 31 +++++++++
 .github/workflows/release-doctor.yml         | 21 +++++++
 .release-please-manifest.json                |  3 +
 .stats.yml                                   |  2 +-
 CONTRIBUTING.md                              |  4 +-
 README.md                                    | 16 ++---
 bin/check-release-environment                | 21 +++++++
 pyproject.toml                               |  8 +--
 release-please-config.json                   | 66 ++++++++++++++++++++
 src/llama_stack_cli/_version.py              |  2 +-
 src/llama_stack_cli/resources/pet.py         |  8 +--
 src/llama_stack_cli/resources/store/order.py |  8 +--
 src/llama_stack_cli/resources/store/store.py |  8 +--
 src/llama_stack_cli/resources/user.py        |  8 +--
 14 files changed, 174 insertions(+), 32 deletions(-)
 create mode 100644 .github/workflows/publish-pypi.yml
 create mode 100644 .github/workflows/release-doctor.yml
 create mode 100644 .release-please-manifest.json
 create mode 100644 bin/check-release-environment
 create mode 100644 release-please-config.json

diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml
new file mode 100644
index 00000000..99ab9c72
--- /dev/null
+++ b/.github/workflows/publish-pypi.yml
@@ -0,0 +1,31 @@
+# This workflow is triggered when a GitHub release is created.
+# It can also be run manually to re-publish to PyPI in case it failed for some reason.
+# You can run this workflow by navigating to https://www.github.com/slekkala1/llama-stack-client-python/actions/workflows/publish-pypi.yml
+name: Publish PyPI
+on:
+  workflow_dispatch:
+
+  release:
+    types: [published]
+
+jobs:
+  publish:
+    name: publish
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install Rye
+        run: |
+          curl -sSf https://rye.astral.sh/get | bash
+          echo "$HOME/.rye/shims" >> $GITHUB_PATH
+        env:
+          RYE_VERSION: '0.44.0'
+          RYE_INSTALL_OPTION: '--yes'
+
+      - name: Publish to PyPI
+        run: |
+          bash ./bin/publish-pypi
+        env:
+          PYPI_TOKEN: ${{ secrets.LLAMA_STACK_CLI_PYPI_TOKEN || secrets.PYPI_TOKEN }}
diff --git a/.github/workflows/release-doctor.yml b/.github/workflows/release-doctor.yml
new file mode 100644
index 00000000..4677883d
--- /dev/null
+++ b/.github/workflows/release-doctor.yml
@@ -0,0 +1,21 @@
+name: Release Doctor
+on:
+  pull_request:
+    branches:
+      - main
+  workflow_dispatch:
+
+jobs:
+  release_doctor:
+    name: release doctor
+    runs-on: ubuntu-latest
+    if: github.repository == 'slekkala1/llama-stack-client-python' && (github.event_name == 'push' || github.event_name == 'workflow_dispatch' || startsWith(github.head_ref, 'release-please') || github.head_ref == 'next')
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Check release environment
+        run: |
+          bash ./bin/check-release-environment
+        env:
+          PYPI_TOKEN: ${{ secrets.LLAMA_STACK_CLI_PYPI_TOKEN || secrets.PYPI_TOKEN }}
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
new file mode 100644
index 00000000..1e4fc9c1
--- /dev/null
+++ b/.release-please-manifest.json
@@ -0,0 +1,3 @@
+{
+  ".": "0.2.13"
+}
\ No newline at end of file
diff --git a/.stats.yml b/.stats.yml
index 6fcf58f8..f0648e7c 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
 configured_endpoints: 19
 openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/meta-slekkala1%2Fllama-stack-cli-6937085190e3e5553f943ff22deda900cd8ad4bf5e37278cba7de683b78ae8d2.yml
 openapi_spec_hash: 85dc5d1e011be6539c240594f06f284b
-config_hash: 96cc2b0706a245b6a0a784aa7dbfe779
+config_hash: 5d940416bbc269b1a0e083c0a52392be
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index ff4e9454..c0976003 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -62,7 +62,7 @@ If you’d like to use the repository from source, you can either install from g
 To install via git:
 
 ```sh
-$ pip install git+ssh://git@github.com/stainless-sdks/llama-stack-cli-python.git
+$ pip install git+ssh://git@github.com/slekkala1/llama-stack-client-python.git
 ```
 
 Alternatively, you can build from source and install the wheel file:
@@ -120,7 +120,7 @@ the changes aren't made through the automated pipeline, you may want to make rel
 
 ### Publish with a GitHub workflow
 
-You can release to package managers by using [the `Publish PyPI` GitHub action](https://www.github.com/stainless-sdks/llama-stack-cli-python/actions/workflows/publish-pypi.yml). This requires a setup organization or repository secret to be set up.
+You can release to package managers by using [the `Publish PyPI` GitHub action](https://www.github.com/slekkala1/llama-stack-client-python/actions/workflows/publish-pypi.yml). This requires a setup organization or repository secret to be set up.
 
 ### Publish manually
 
diff --git a/README.md b/README.md
index 4f204675..cb707219 100644
--- a/README.md
+++ b/README.md
@@ -16,12 +16,12 @@ The full API of this library can be found in [api.md](api.md).
 ## Installation
 
 ```sh
-# install from this staging repo
-pip install git+ssh://git@github.com/stainless-sdks/llama-stack-cli-python.git
+# install from the production repo
+pip install git+ssh://git@github.com/slekkala1/llama-stack-client-python.git
 ```
 
 > [!NOTE]
-> Once this package is [published to PyPI](https://www.stainless.com/docs/guides/publish), this will become: `pip install --pre llama_stack_cli`
+> Once this package is [published to PyPI](https://www.stainless.com/docs/guides/publish), this will become: `pip install llama_stack_cli`
 
 ## Usage
 
@@ -83,8 +83,8 @@ By default, the async client uses `httpx` for HTTP requests. However, for improv
 You can enable this by installing `aiohttp`:
 
 ```sh
-# install from this staging repo
-pip install 'llama_stack_cli[aiohttp] @ git+ssh://git@github.com/stainless-sdks/llama-stack-cli-python.git'
+# install from the production repo
+pip install 'llama_stack_cli[aiohttp] @ git+ssh://git@github.com/slekkala1/llama-stack-client-python.git'
 ```
 
 Then you can enable it by instantiating the client with `http_client=DefaultAioHttpClient()`:
@@ -267,9 +267,9 @@ store = response.parse()  # get the object that `store.list_inventory()` would h
 print(store)
 ```
 
-These methods return an [`APIResponse`](https://github.com/stainless-sdks/llama-stack-cli-python/tree/main/src/llama_stack_cli/_response.py) object.
+These methods return an [`APIResponse`](https://github.com/slekkala1/llama-stack-client-python/tree/main/src/llama_stack_cli/_response.py) object.
 
-The async client returns an [`AsyncAPIResponse`](https://github.com/stainless-sdks/llama-stack-cli-python/tree/main/src/llama_stack_cli/_response.py) with the same structure, the only difference being `await`able methods for reading the response content.
+The async client returns an [`AsyncAPIResponse`](https://github.com/slekkala1/llama-stack-client-python/tree/main/src/llama_stack_cli/_response.py) with the same structure, the only difference being `await`able methods for reading the response content.
 
 #### `.with_streaming_response`
 
@@ -373,7 +373,7 @@ This package generally follows [SemVer](https://semver.org/spec/v2.0.0.html) con
 
 We take backwards-compatibility seriously and work hard to ensure you can rely on a smooth upgrade experience.
 
-We are keen for your feedback; please open an [issue](https://www.github.com/stainless-sdks/llama-stack-cli-python/issues) with questions, bugs, or suggestions.
+We are keen for your feedback; please open an [issue](https://www.github.com/slekkala1/llama-stack-client-python/issues) with questions, bugs, or suggestions.
 
 ### Determining the installed version
 
diff --git a/bin/check-release-environment b/bin/check-release-environment
new file mode 100644
index 00000000..b845b0f4
--- /dev/null
+++ b/bin/check-release-environment
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+
+errors=()
+
+if [ -z "${PYPI_TOKEN}" ]; then
+  errors+=("The PYPI_TOKEN secret has not been set. Please set it in either this repository's secrets or your organization secrets.")
+fi
+
+lenErrors=${#errors[@]}
+
+if [[ lenErrors -gt 0 ]]; then
+  echo -e "Found the following errors in the release environment:\n"
+
+  for error in "${errors[@]}"; do
+    echo -e "- $error\n"
+  done
+
+  exit 1
+fi
+
+echo "The environment is ready to push releases!"
diff --git a/pyproject.toml b/pyproject.toml
index 4d033ff2..4c51ca55 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "llama_stack_cli"
-version = "0.0.1-alpha.0"
+version = "0.2.13"
 description = "The official Python library for the llama-stack-cli API"
 dynamic = ["readme"]
 license = "Apache-2.0"
@@ -35,8 +35,8 @@ classifiers = [
 ]
 
 [project.urls]
-Homepage = "https://github.com/stainless-sdks/llama-stack-cli-python"
-Repository = "https://github.com/stainless-sdks/llama-stack-cli-python"
+Homepage = "https://github.com/slekkala1/llama-stack-client-python"
+Repository = "https://github.com/slekkala1/llama-stack-client-python"
 
 [project.optional-dependencies]
 aiohttp = ["aiohttp", "httpx_aiohttp>=0.1.8"]
@@ -125,7 +125,7 @@ path = "README.md"
 [[tool.hatch.metadata.hooks.fancy-pypi-readme.substitutions]]
 # replace relative links with absolute links
 pattern = '\[(.+?)\]\(((?!https?://)\S+?)\)'
-replacement = '[\1](https://github.com/stainless-sdks/llama-stack-cli-python/tree/main/\g<2>)'
+replacement = '[\1](https://github.com/slekkala1/llama-stack-client-python/tree/main/\g<2>)'
 
 [tool.pytest.ini_options]
 testpaths = ["tests"]
diff --git a/release-please-config.json b/release-please-config.json
new file mode 100644
index 00000000..c94f05bc
--- /dev/null
+++ b/release-please-config.json
@@ -0,0 +1,66 @@
+{
+  "packages": {
+    ".": {}
+  },
+  "$schema": "https://raw.githubusercontent.com/stainless-api/release-please/main/schemas/config.json",
+  "include-v-in-tag": true,
+  "include-component-in-tag": false,
+  "versioning": "prerelease",
+  "prerelease": true,
+  "bump-minor-pre-major": true,
+  "bump-patch-for-minor-pre-major": false,
+  "pull-request-header": "Automated Release PR",
+  "pull-request-title-pattern": "release: ${version}",
+  "changelog-sections": [
+    {
+      "type": "feat",
+      "section": "Features"
+    },
+    {
+      "type": "fix",
+      "section": "Bug Fixes"
+    },
+    {
+      "type": "perf",
+      "section": "Performance Improvements"
+    },
+    {
+      "type": "revert",
+      "section": "Reverts"
+    },
+    {
+      "type": "chore",
+      "section": "Chores"
+    },
+    {
+      "type": "docs",
+      "section": "Documentation"
+    },
+    {
+      "type": "style",
+      "section": "Styles"
+    },
+    {
+      "type": "refactor",
+      "section": "Refactors"
+    },
+    {
+      "type": "test",
+      "section": "Tests",
+      "hidden": true
+    },
+    {
+      "type": "build",
+      "section": "Build System"
+    },
+    {
+      "type": "ci",
+      "section": "Continuous Integration",
+      "hidden": true
+    }
+  ],
+  "release-type": "python",
+  "extra-files": [
+    "src/llama_stack_cli/_version.py"
+  ]
+}
\ No newline at end of file
diff --git a/src/llama_stack_cli/_version.py b/src/llama_stack_cli/_version.py
index de2b23de..9bf8b038 100644
--- a/src/llama_stack_cli/_version.py
+++ b/src/llama_stack_cli/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "llama_stack_cli"
-__version__ = "0.0.1-alpha.0"
+__version__ = "0.2.13"  # x-release-please-version
diff --git a/src/llama_stack_cli/resources/pet.py b/src/llama_stack_cli/resources/pet.py
index 49d949d5..4daaf2d3 100644
--- a/src/llama_stack_cli/resources/pet.py
+++ b/src/llama_stack_cli/resources/pet.py
@@ -42,7 +42,7 @@ def with_raw_response(self) -> PetResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-cli-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/slekkala1/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return PetResourceWithRawResponse(self)
 
@@ -51,7 +51,7 @@ def with_streaming_response(self) -> PetResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-cli-python#with_streaming_response
+        For more information, see https://www.github.com/slekkala1/llama-stack-client-python#with_streaming_response
         """
         return PetResourceWithStreamingResponse(self)
 
@@ -390,7 +390,7 @@ def with_raw_response(self) -> AsyncPetResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-cli-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/slekkala1/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return AsyncPetResourceWithRawResponse(self)
 
@@ -399,7 +399,7 @@ def with_streaming_response(self) -> AsyncPetResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-cli-python#with_streaming_response
+        For more information, see https://www.github.com/slekkala1/llama-stack-client-python#with_streaming_response
         """
         return AsyncPetResourceWithStreamingResponse(self)
 
diff --git a/src/llama_stack_cli/resources/store/order.py b/src/llama_stack_cli/resources/store/order.py
index 4d9489c3..5c46eb42 100644
--- a/src/llama_stack_cli/resources/store/order.py
+++ b/src/llama_stack_cli/resources/store/order.py
@@ -32,7 +32,7 @@ def with_raw_response(self) -> OrderResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-cli-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/slekkala1/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return OrderResourceWithRawResponse(self)
 
@@ -41,7 +41,7 @@ def with_streaming_response(self) -> OrderResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-cli-python#with_streaming_response
+        For more information, see https://www.github.com/slekkala1/llama-stack-client-python#with_streaming_response
         """
         return OrderResourceWithStreamingResponse(self)
 
@@ -169,7 +169,7 @@ def with_raw_response(self) -> AsyncOrderResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-cli-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/slekkala1/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return AsyncOrderResourceWithRawResponse(self)
 
@@ -178,7 +178,7 @@ def with_streaming_response(self) -> AsyncOrderResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-cli-python#with_streaming_response
+        For more information, see https://www.github.com/slekkala1/llama-stack-client-python#with_streaming_response
         """
         return AsyncOrderResourceWithStreamingResponse(self)
 
diff --git a/src/llama_stack_cli/resources/store/store.py b/src/llama_stack_cli/resources/store/store.py
index 1494e998..c552c695 100644
--- a/src/llama_stack_cli/resources/store/store.py
+++ b/src/llama_stack_cli/resources/store/store.py
@@ -38,7 +38,7 @@ def with_raw_response(self) -> StoreResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-cli-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/slekkala1/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return StoreResourceWithRawResponse(self)
 
@@ -47,7 +47,7 @@ def with_streaming_response(self) -> StoreResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-cli-python#with_streaming_response
+        For more information, see https://www.github.com/slekkala1/llama-stack-client-python#with_streaming_response
         """
         return StoreResourceWithStreamingResponse(self)
 
@@ -82,7 +82,7 @@ def with_raw_response(self) -> AsyncStoreResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-cli-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/slekkala1/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return AsyncStoreResourceWithRawResponse(self)
 
@@ -91,7 +91,7 @@ def with_streaming_response(self) -> AsyncStoreResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-cli-python#with_streaming_response
+        For more information, see https://www.github.com/slekkala1/llama-stack-client-python#with_streaming_response
         """
         return AsyncStoreResourceWithStreamingResponse(self)
 
diff --git a/src/llama_stack_cli/resources/user.py b/src/llama_stack_cli/resources/user.py
index e9cf3e53..886485d2 100644
--- a/src/llama_stack_cli/resources/user.py
+++ b/src/llama_stack_cli/resources/user.py
@@ -31,7 +31,7 @@ def with_raw_response(self) -> UserResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-cli-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/slekkala1/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return UserResourceWithRawResponse(self)
 
@@ -40,7 +40,7 @@ def with_streaming_response(self) -> UserResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-cli-python#with_streaming_response
+        For more information, see https://www.github.com/slekkala1/llama-stack-client-python#with_streaming_response
         """
         return UserResourceWithStreamingResponse(self)
 
@@ -327,7 +327,7 @@ def with_raw_response(self) -> AsyncUserResourceWithRawResponse:
         This property can be used as a prefix for any HTTP method call to return
         the raw response object instead of the parsed content.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-cli-python#accessing-raw-response-data-eg-headers
+        For more information, see https://www.github.com/slekkala1/llama-stack-client-python#accessing-raw-response-data-eg-headers
         """
         return AsyncUserResourceWithRawResponse(self)
 
@@ -336,7 +336,7 @@ def with_streaming_response(self) -> AsyncUserResourceWithStreamingResponse:
         """
         An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-        For more information, see https://www.github.com/stainless-sdks/llama-stack-cli-python#with_streaming_response
+        For more information, see https://www.github.com/slekkala1/llama-stack-client-python#with_streaming_response
         """
         return AsyncUserResourceWithStreamingResponse(self)
 

From 7878113536606734388e035960ddda19d7904da0 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Fri, 18 Jul 2025 20:08:29 +0000
Subject: [PATCH 3/4] chore: update SDK settings

---
 .stats.yml |  2 +-
 README.md  | 11 ++++-------
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/.stats.yml b/.stats.yml
index f0648e7c..fc28151c 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
 configured_endpoints: 19
 openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/meta-slekkala1%2Fllama-stack-cli-6937085190e3e5553f943ff22deda900cd8ad4bf5e37278cba7de683b78ae8d2.yml
 openapi_spec_hash: 85dc5d1e011be6539c240594f06f284b
-config_hash: 5d940416bbc269b1a0e083c0a52392be
+config_hash: 709fc792a3021f7f92ca66767536f8d7
diff --git a/README.md b/README.md
index cb707219..8030998c 100644
--- a/README.md
+++ b/README.md
@@ -16,13 +16,10 @@ The full API of this library can be found in [api.md](api.md).
 ## Installation
 
 ```sh
-# install from the production repo
-pip install git+ssh://git@github.com/slekkala1/llama-stack-client-python.git
+# install from PyPI
+pip install llama_stack_cli
 ```
 
-> [!NOTE]
-> Once this package is [published to PyPI](https://www.stainless.com/docs/guides/publish), this will become: `pip install llama_stack_cli`
-
 ## Usage
 
 The full API of this library can be found in [api.md](api.md).
@@ -83,8 +80,8 @@ By default, the async client uses `httpx` for HTTP requests. However, for improv
 You can enable this by installing `aiohttp`:
 
 ```sh
-# install from the production repo
-pip install 'llama_stack_cli[aiohttp] @ git+ssh://git@github.com/slekkala1/llama-stack-client-python.git'
+# install from PyPI
+pip install llama_stack_cli[aiohttp]
 ```
 
 Then you can enable it by instantiating the client with `http_client=DefaultAioHttpClient()`:

From f1220360bc200fc52351e8255060d7abf47ba797 Mon Sep 17 00:00:00 2001
From: "stainless-app[bot]"
 <142633134+stainless-app[bot]@users.noreply.github.com>
Date: Fri, 18 Jul 2025 20:10:40 +0000
Subject: [PATCH 4/4] release: 1.0.0

---
 .release-please-manifest.json   |  2 +-
 CHANGELOG.md                    | 91 +++++++++++++++++++++++++++++++++
 pyproject.toml                  |  2 +-
 src/llama_stack_cli/_version.py |  2 +-
 4 files changed, 94 insertions(+), 3 deletions(-)
 create mode 100644 CHANGELOG.md

diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 1e4fc9c1..fea34540 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "0.2.13"
+  ".": "1.0.0"
 }
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 00000000..5ea2db49
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,91 @@
+# Changelog
+
+## 1.0.0 (2025-07-18)
+
+Full Changelog: [v0.2.13...v1.0.0](https://github.com/slekkala1/llama-stack-client-python/compare/v0.2.13...v1.0.0)
+
+### Features
+
+* `llama-stack-client providers inspect PROVIDER_ID` ([#181](https://github.com/slekkala1/llama-stack-client-python/issues/181)) ([6d18aae](https://github.com/slekkala1/llama-stack-client-python/commit/6d18aae31ce739b1a37a72b880aa8a60f890df72))
+* add client-side utility for getting OAuth tokens simply ([#230](https://github.com/slekkala1/llama-stack-client-python/issues/230)) ([91156dc](https://github.com/slekkala1/llama-stack-client-python/commit/91156dca28567352c5f6be75d55327ef2b49ff19))
+* add client.chat.completions.create() and client.completions.create() ([#226](https://github.com/slekkala1/llama-stack-client-python/issues/226)) ([ee0e65e](https://github.com/slekkala1/llama-stack-client-python/commit/ee0e65e89dba13431cc3b9abdbebaa9525a5fbfb))
+* Add llama-stack-client datasets unregister command ([#222](https://github.com/slekkala1/llama-stack-client-python/issues/222)) ([38cd91c](https://github.com/slekkala1/llama-stack-client-python/commit/38cd91c9e396f2be0bec1ee96a19771582ba6f17))
+* add support for chat sessions ([#167](https://github.com/slekkala1/llama-stack-client-python/issues/167)) ([ce3b30f](https://github.com/slekkala1/llama-stack-client-python/commit/ce3b30f83eb122cc200c441ddad5e173e02e5adb))
+* add type hints to event logger util ([#140](https://github.com/slekkala1/llama-stack-client-python/issues/140)) ([26f3c33](https://github.com/slekkala1/llama-stack-client-python/commit/26f3c33cd0f81b809afa514b9a8ca63fa64643ca))
+* add updated batch inference types ([#220](https://github.com/slekkala1/llama-stack-client-python/issues/220)) ([ddb93ca](https://github.com/slekkala1/llama-stack-client-python/commit/ddb93ca206d97c82c51a0efed5985a7396fcdf3c))
+* add weighted_average aggregation function support ([#208](https://github.com/slekkala1/llama-stack-client-python/issues/208)) ([b62ac6c](https://github.com/slekkala1/llama-stack-client-python/commit/b62ac6cf2f2f20e248cbbce6684cef50f150cac0))
+* **agent:** support multiple tool calls ([#192](https://github.com/slekkala1/llama-stack-client-python/issues/192)) ([43ea2f6](https://github.com/slekkala1/llama-stack-client-python/commit/43ea2f6d741b26181db1d7ba0912c17a9ed1ca74))
+* **agent:** support plain function as client_tool ([#187](https://github.com/slekkala1/llama-stack-client-python/issues/187)) ([2ec8044](https://github.com/slekkala1/llama-stack-client-python/commit/2ec8044356b5d6285948ae22da007899f6148408))
+* async agent wrapper ([#169](https://github.com/slekkala1/llama-stack-client-python/issues/169)) ([fc9907c](https://github.com/slekkala1/llama-stack-client-python/commit/fc9907c781dc406756c20d8a1829343eac0c31c0))
+* autogen llama-stack-client CLI reference doc ([#190](https://github.com/slekkala1/llama-stack-client-python/issues/190)) ([e7b19a5](https://github.com/slekkala1/llama-stack-client-python/commit/e7b19a505cc06c28846e85bb5b8524632bdef4d6))
+* client.responses.create() and client.responses.retrieve() ([#227](https://github.com/slekkala1/llama-stack-client-python/issues/227)) ([fba5102](https://github.com/slekkala1/llama-stack-client-python/commit/fba5102d03f85627025f4589216651d135841d5a))
+* datasets api updates ([#203](https://github.com/slekkala1/llama-stack-client-python/issues/203)) ([b664564](https://github.com/slekkala1/llama-stack-client-python/commit/b664564fe1c4771a7872286d0c2ac96c47816939))
+* enable_persist: sync updates from stainless branch: yanxi0830/dev ([#145](https://github.com/slekkala1/llama-stack-client-python/issues/145)) ([59a02f0](https://github.com/slekkala1/llama-stack-client-python/commit/59a02f071b14cb6627c929c4d396a3d996219c78))
+* new Agent API ([#178](https://github.com/slekkala1/llama-stack-client-python/issues/178)) ([c2f73b1](https://github.com/slekkala1/llama-stack-client-python/commit/c2f73b11301c6c4a87e58ded9055fd49b1626b47))
+* support client tool output metadata ([#180](https://github.com/slekkala1/llama-stack-client-python/issues/180)) ([8e4fd56](https://github.com/slekkala1/llama-stack-client-python/commit/8e4fd56a318a2806e81679877d703f6270fbcbfe))
+* Sync updates from stainless branch: ehhuang/dev ([#149](https://github.com/slekkala1/llama-stack-client-python/issues/149)) ([367da69](https://github.com/slekkala1/llama-stack-client-python/commit/367da690dabee8a34039499f8e151cc8f97ca91b))
+* unify max infer iters with server/client tools ([#173](https://github.com/slekkala1/llama-stack-client-python/issues/173)) ([548f2de](https://github.com/slekkala1/llama-stack-client-python/commit/548f2dee5019b7510d17025f11adbf61431f505e))
+* update react with new agent api ([#189](https://github.com/slekkala1/llama-stack-client-python/issues/189)) ([ac9d1e2](https://github.com/slekkala1/llama-stack-client-python/commit/ac9d1e2166c88d2445fbbf08e30886fcec6048df))
+
+
+### Bug Fixes
+
+* `llama-stack-client provider inspect` should use retrieve ([#202](https://github.com/slekkala1/llama-stack-client-python/issues/202)) ([e33b5bf](https://github.com/slekkala1/llama-stack-client-python/commit/e33b5bfbc89c93031434720cf7265f9bc83f2a39))
+* accept extra_headers in agent.create_turn and pass them faithfully ([#228](https://github.com/slekkala1/llama-stack-client-python/issues/228)) ([e72d9e8](https://github.com/slekkala1/llama-stack-client-python/commit/e72d9e8eb590facd693938a93a7a782e45d15b6d))
+* added uv.lock ([546e0df](https://github.com/slekkala1/llama-stack-client-python/commit/546e0df348b648651da94989053c52f4cc43cdc4))
+* **agent:** better error handling ([#207](https://github.com/slekkala1/llama-stack-client-python/issues/207)) ([5746f91](https://github.com/slekkala1/llama-stack-client-python/commit/5746f918351f9021700f0a90edf6b78e74d58c82))
+* **agent:** initialize toolgroups/client_tools ([#186](https://github.com/slekkala1/llama-stack-client-python/issues/186)) ([458e207](https://github.com/slekkala1/llama-stack-client-python/commit/458e20702b5aa8f435ac5ce114fee9252b751d25))
+* broken .retrieve call using `identifier=` ([#135](https://github.com/slekkala1/llama-stack-client-python/issues/135)) ([626805a](https://github.com/slekkala1/llama-stack-client-python/commit/626805a74a19011d742a60187b1119aead153a94))
+* bump to 0.2.1 ([edb6173](https://github.com/slekkala1/llama-stack-client-python/commit/edb6173ec1f0da131e097a993d6f177a3655930d))
+* bump version ([b6d45b8](https://github.com/slekkala1/llama-stack-client-python/commit/b6d45b862ca846bed635d64816dc7de9d9433e61))
+* bump version in another place ([7253433](https://github.com/slekkala1/llama-stack-client-python/commit/7253433f6d7a41fe0812d26e4ce7183f922f2869))
+* **cli:** align cli toolgroups register to the new arguments ([#231](https://github.com/slekkala1/llama-stack-client-python/issues/231)) ([a87b6f7](https://github.com/slekkala1/llama-stack-client-python/commit/a87b6f7b3fd07262bfbd4321652e51b901c75df5))
+* correct toolgroups_id parameter name on unregister call ([#235](https://github.com/slekkala1/llama-stack-client-python/issues/235)) ([1be7904](https://github.com/slekkala1/llama-stack-client-python/commit/1be7904133630127c0a98ba4aed1241eee548c81))
+* fix duplicate model get help text ([#188](https://github.com/slekkala1/llama-stack-client-python/issues/188)) ([4bab07a](https://github.com/slekkala1/llama-stack-client-python/commit/4bab07a683adee9a476ce926fe809dafe3cc27f0))
+* llama-stack-client providers list ([#134](https://github.com/slekkala1/llama-stack-client-python/issues/134)) ([930138a](https://github.com/slekkala1/llama-stack-client-python/commit/930138a9013ee9157d14ee0606b24c5677bf4387))
+* react agent ([#200](https://github.com/slekkala1/llama-stack-client-python/issues/200)) ([b779979](https://github.com/slekkala1/llama-stack-client-python/commit/b779979c40c638e835e5190e5877f57430c89d97))
+* React Agent for non-llama models  ([#174](https://github.com/slekkala1/llama-stack-client-python/issues/174)) ([ee5dd2b](https://github.com/slekkala1/llama-stack-client-python/commit/ee5dd2b662ffdeb78b324dddd6884a4d0f1fd901))
+* React agent should be able to work with provided config ([#146](https://github.com/slekkala1/llama-stack-client-python/issues/146)) ([08ab5df](https://github.com/slekkala1/llama-stack-client-python/commit/08ab5df583bb74dea9104950c190f6101eb19c95))
+* react agent with custom tool parser n_iters ([#184](https://github.com/slekkala1/llama-stack-client-python/issues/184)) ([aaff961](https://github.com/slekkala1/llama-stack-client-python/commit/aaff9618601f1cded040e57e0d8067699e595208))
+* remove the alpha suffix in run_benchmark.py ([#179](https://github.com/slekkala1/llama-stack-client-python/issues/179)) ([638f7f2](https://github.com/slekkala1/llama-stack-client-python/commit/638f7f29513cdb87b9bf0cf7bc269d2c576d37ba))
+* update CONTRIBUTING.md to point to uv instead of rye ([3fbe0cd](https://github.com/slekkala1/llama-stack-client-python/commit/3fbe0cdd6a8e935732ddc513b0a6af01623a6999))
+* update uv lock ([cc072c8](https://github.com/slekkala1/llama-stack-client-python/commit/cc072c81b59c26f21eaba6ee0a7d56fc61c0317a))
+* validate endpoint url ([#196](https://github.com/slekkala1/llama-stack-client-python/issues/196)) ([6fa8095](https://github.com/slekkala1/llama-stack-client-python/commit/6fa8095428804a9cc348b403468cad64e4eeb38b))
+
+
+### Chores
+
+* api sync, deprecate allow_resume_turn + rename task_config-&gt;benchmark_config (Sync updates from stainless branch: yanxi0830/dev) ([#176](https://github.com/slekkala1/llama-stack-client-python/issues/176)) ([96749af](https://github.com/slekkala1/llama-stack-client-python/commit/96749af83891d47be1f8f46588be567db685cf12))
+* AsyncAgent should use ToolResponse instead of ToolResponseMessage ([#197](https://github.com/slekkala1/llama-stack-client-python/issues/197)) ([6191aa5](https://github.com/slekkala1/llama-stack-client-python/commit/6191aa5cc38c4ef9be27452e04867b6ce8a703e2))
+* deprecate eval task (Sync updates from stainless branch: main) ([#150](https://github.com/slekkala1/llama-stack-client-python/issues/150)) ([39b1248](https://github.com/slekkala1/llama-stack-client-python/commit/39b1248e3e1b0634e96db6bb4eac7d689e3a5a19))
+* remove litellm type conversion ([#193](https://github.com/slekkala1/llama-stack-client-python/issues/193)) ([ab3f844](https://github.com/slekkala1/llama-stack-client-python/commit/ab3f844a8a7a8dc68723ed36120914fd01a18af2))
+* sync repo ([e515481](https://github.com/slekkala1/llama-stack-client-python/commit/e515481f103480285df700fb5dcf204810e05508))
+* Sync updates from stainless branch: ehhuang/dev ([#182](https://github.com/slekkala1/llama-stack-client-python/issues/182)) ([e33aa4a](https://github.com/slekkala1/llama-stack-client-python/commit/e33aa4a682fda23d708438a976dfe4dd5443a320))
+* Sync updates from stainless branch: ehhuang/dev ([#199](https://github.com/slekkala1/llama-stack-client-python/issues/199)) ([fa73d7d](https://github.com/slekkala1/llama-stack-client-python/commit/fa73d7ddb72682d47464eca6b1476044e140a560))
+* Sync updates from stainless branch: main ([#201](https://github.com/slekkala1/llama-stack-client-python/issues/201)) ([f063f2d](https://github.com/slekkala1/llama-stack-client-python/commit/f063f2d6126d2bd1f9a8dcf854a32ae7cd4be607))
+* update SDK settings ([7878113](https://github.com/slekkala1/llama-stack-client-python/commit/7878113536606734388e035960ddda19d7904da0))
+* update SDK settings ([9410d73](https://github.com/slekkala1/llama-stack-client-python/commit/9410d73217e5e6ce6d043d0c42a27cd7f740abbc))
+* use rich to format logs ([#177](https://github.com/slekkala1/llama-stack-client-python/issues/177)) ([303054b](https://github.com/slekkala1/llama-stack-client-python/commit/303054b6a64e47dbdf7de93458433b71bb1ff59c))
+
+
+### Refactors
+
+* update react_agent to use tool_config ([#139](https://github.com/slekkala1/llama-stack-client-python/issues/139)) ([b5dce10](https://github.com/slekkala1/llama-stack-client-python/commit/b5dce10f0a621f8f8a0f893dba4d2acebd7e438b))
+
+
+### Build System
+
+* Bump version to 0.1.19 ([ccd52f8](https://github.com/slekkala1/llama-stack-client-python/commit/ccd52f8bb298ecfd3ec06ae2d50ccaeebbfb3973))
+* Bump version to 0.1.8 ([0144e85](https://github.com/slekkala1/llama-stack-client-python/commit/0144e857c83afc807122b32f3f53775e87c027ac))
+* Bump version to 0.1.9 ([7e00b78](https://github.com/slekkala1/llama-stack-client-python/commit/7e00b784ee859aa04aa11955e3888e5167331dfe))
+* Bump version to 0.2.10 ([05e41a6](https://github.com/slekkala1/llama-stack-client-python/commit/05e41a6eb12053b850a3abc56bb35e3121042be2))
+* Bump version to 0.2.11 ([d2e7537](https://github.com/slekkala1/llama-stack-client-python/commit/d2e753751519cb9f0e09d255e875f60449ab30aa))
+* Bump version to 0.2.12 ([e3d812e](https://github.com/slekkala1/llama-stack-client-python/commit/e3d812ee3a85949e31e448e68c03534225b4ed07))
+* Bump version to 0.2.13 ([b6c6c5e](https://github.com/slekkala1/llama-stack-client-python/commit/b6c6c5ed7940bb625665d50f88ff7ea9d734e100))
+* Bump version to 0.2.2 ([47f8fd5](https://github.com/slekkala1/llama-stack-client-python/commit/47f8fd568634c9e2f7cd7d86f92f7c43cfc448cd))
+* Bump version to 0.2.4 ([7e6f5fc](https://github.com/slekkala1/llama-stack-client-python/commit/7e6f5fce18f23b807e52ac173251687c3b58979b))
+* Bump version to 0.2.5 ([62bd127](https://github.com/slekkala1/llama-stack-client-python/commit/62bd12799d8a4a0261d200d1c869e2be98c38770))
+* Bump version to 0.2.6 ([3dd707f](https://github.com/slekkala1/llama-stack-client-python/commit/3dd707fb84ba2ce56151cec9fb30918c651ccdd9))
+* Bump version to 0.2.7 ([e39ba88](https://github.com/slekkala1/llama-stack-client-python/commit/e39ba882f9d1f635f5e7398f623d7ceeae1b446f))
+* Bump version to 0.2.8 ([645d219](https://github.com/slekkala1/llama-stack-client-python/commit/645d2195c5af1c6f903cb93c293319d8f94c36cc))
+* Bump version to 0.2.9 ([d360557](https://github.com/slekkala1/llama-stack-client-python/commit/d36055741dd5c152c629dc28ec3b88b2c78f5336))
diff --git a/pyproject.toml b/pyproject.toml
index 4c51ca55..ded711df 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "llama_stack_cli"
-version = "0.2.13"
+version = "1.0.0"
 description = "The official Python library for the llama-stack-cli API"
 dynamic = ["readme"]
 license = "Apache-2.0"
diff --git a/src/llama_stack_cli/_version.py b/src/llama_stack_cli/_version.py
index 9bf8b038..aea5563e 100644
--- a/src/llama_stack_cli/_version.py
+++ b/src/llama_stack_cli/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "llama_stack_cli"
-__version__ = "0.2.13"  # x-release-please-version
+__version__ = "1.0.0"  # x-release-please-version