From 8bdafa997a30877e8e6119804ad069c7cd6afb44 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Thu, 20 Feb 2025 14:30:05 -0800 Subject: [PATCH] Sync updates from stainless branch: main --- src/llama_stack_client/_client.py | 9 -- src/llama_stack_client/resources/__init__.py | 14 --- src/llama_stack_client/resources/eval/eval.py | 32 +++---- src/llama_stack_client/types/__init__.py | 2 - .../types/benchmark_config_param.py | 4 +- tests/api_resources/test_eval.py | 88 +++++-------------- 6 files changed, 41 insertions(+), 108 deletions(-) diff --git a/src/llama_stack_client/_client.py b/src/llama_stack_client/_client.py index 82353ebd..bb5bb755 100644 --- a/src/llama_stack_client/_client.py +++ b/src/llama_stack_client/_client.py @@ -40,7 +40,6 @@ telemetry, vector_io, benchmarks, - eval_tasks, toolgroups, vector_dbs, batch_inference, @@ -94,7 +93,6 @@ class LlamaStackClient(SyncAPIClient): datasetio: datasetio.DatasetioResource scoring: scoring.ScoringResource scoring_functions: scoring_functions.ScoringFunctionsResource - eval_tasks: eval_tasks.EvalTasksResource benchmarks: benchmarks.BenchmarksResource with_raw_response: LlamaStackClientWithRawResponse with_streaming_response: LlamaStackClientWithStreamedResponse @@ -177,7 +175,6 @@ def __init__( self.datasetio = datasetio.DatasetioResource(self) self.scoring = scoring.ScoringResource(self) self.scoring_functions = scoring_functions.ScoringFunctionsResource(self) - self.eval_tasks = eval_tasks.EvalTasksResource(self) self.benchmarks = benchmarks.BenchmarksResource(self) self.with_raw_response = LlamaStackClientWithRawResponse(self) self.with_streaming_response = LlamaStackClientWithStreamedResponse(self) @@ -312,7 +309,6 @@ class AsyncLlamaStackClient(AsyncAPIClient): datasetio: datasetio.AsyncDatasetioResource scoring: scoring.AsyncScoringResource scoring_functions: scoring_functions.AsyncScoringFunctionsResource - eval_tasks: eval_tasks.AsyncEvalTasksResource benchmarks: benchmarks.AsyncBenchmarksResource with_raw_response: AsyncLlamaStackClientWithRawResponse with_streaming_response: AsyncLlamaStackClientWithStreamedResponse @@ -395,7 +391,6 @@ def __init__( self.datasetio = datasetio.AsyncDatasetioResource(self) self.scoring = scoring.AsyncScoringResource(self) self.scoring_functions = scoring_functions.AsyncScoringFunctionsResource(self) - self.eval_tasks = eval_tasks.AsyncEvalTasksResource(self) self.benchmarks = benchmarks.AsyncBenchmarksResource(self) self.with_raw_response = AsyncLlamaStackClientWithRawResponse(self) self.with_streaming_response = AsyncLlamaStackClientWithStreamedResponse(self) @@ -533,7 +528,6 @@ def __init__(self, client: LlamaStackClient) -> None: self.datasetio = datasetio.DatasetioResourceWithRawResponse(client.datasetio) self.scoring = scoring.ScoringResourceWithRawResponse(client.scoring) self.scoring_functions = scoring_functions.ScoringFunctionsResourceWithRawResponse(client.scoring_functions) - self.eval_tasks = eval_tasks.EvalTasksResourceWithRawResponse(client.eval_tasks) self.benchmarks = benchmarks.BenchmarksResourceWithRawResponse(client.benchmarks) @@ -565,7 +559,6 @@ def __init__(self, client: AsyncLlamaStackClient) -> None: self.scoring_functions = scoring_functions.AsyncScoringFunctionsResourceWithRawResponse( client.scoring_functions ) - self.eval_tasks = eval_tasks.AsyncEvalTasksResourceWithRawResponse(client.eval_tasks) self.benchmarks = benchmarks.AsyncBenchmarksResourceWithRawResponse(client.benchmarks) @@ -597,7 +590,6 @@ def __init__(self, client: LlamaStackClient) -> None: self.scoring_functions = scoring_functions.ScoringFunctionsResourceWithStreamingResponse( client.scoring_functions ) - self.eval_tasks = eval_tasks.EvalTasksResourceWithStreamingResponse(client.eval_tasks) self.benchmarks = benchmarks.BenchmarksResourceWithStreamingResponse(client.benchmarks) @@ -631,7 +623,6 @@ def __init__(self, client: AsyncLlamaStackClient) -> None: self.scoring_functions = scoring_functions.AsyncScoringFunctionsResourceWithStreamingResponse( client.scoring_functions ) - self.eval_tasks = eval_tasks.AsyncEvalTasksResourceWithStreamingResponse(client.eval_tasks) self.benchmarks = benchmarks.AsyncBenchmarksResourceWithStreamingResponse(client.benchmarks) diff --git a/src/llama_stack_client/resources/__init__.py b/src/llama_stack_client/resources/__init__.py index b5e449c9..449fb4a1 100644 --- a/src/llama_stack_client/resources/__init__.py +++ b/src/llama_stack_client/resources/__init__.py @@ -128,14 +128,6 @@ BenchmarksResourceWithStreamingResponse, AsyncBenchmarksResourceWithStreamingResponse, ) -from .eval_tasks import ( - EvalTasksResource, - AsyncEvalTasksResource, - EvalTasksResourceWithRawResponse, - AsyncEvalTasksResourceWithRawResponse, - EvalTasksResourceWithStreamingResponse, - AsyncEvalTasksResourceWithStreamingResponse, -) from .toolgroups import ( ToolgroupsResource, AsyncToolgroupsResource, @@ -326,12 +318,6 @@ "AsyncScoringFunctionsResourceWithRawResponse", "ScoringFunctionsResourceWithStreamingResponse", "AsyncScoringFunctionsResourceWithStreamingResponse", - "EvalTasksResource", - "AsyncEvalTasksResource", - "EvalTasksResourceWithRawResponse", - "AsyncEvalTasksResourceWithRawResponse", - "EvalTasksResourceWithStreamingResponse", - "AsyncEvalTasksResourceWithStreamingResponse", "BenchmarksResource", "AsyncBenchmarksResource", "BenchmarksResourceWithRawResponse", diff --git a/src/llama_stack_client/resources/eval/eval.py b/src/llama_stack_client/resources/eval/eval.py index 053d2398..6ea1669c 100644 --- a/src/llama_stack_client/resources/eval/eval.py +++ b/src/llama_stack_client/resources/eval/eval.py @@ -67,7 +67,7 @@ def with_streaming_response(self) -> EvalResourceWithStreamingResponse: def evaluate_rows( self, - task_id: str, + benchmark_id: str, *, input_rows: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]], scoring_functions: List[str], @@ -89,10 +89,10 @@ def evaluate_rows( timeout: Override the client-level default timeout for this request, in seconds """ - if not task_id: - raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}") + if not benchmark_id: + raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}") return self._post( - f"/v1/eval/tasks/{task_id}/evaluations", + f"/v1/eval/benchmarks/{benchmark_id}/evaluations", body=maybe_transform( { "input_rows": input_rows, @@ -151,7 +151,7 @@ def evaluate_rows_alpha( def run_eval( self, - task_id: str, + benchmark_id: str, *, task_config: BenchmarkConfigParam, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. @@ -171,10 +171,10 @@ def run_eval( timeout: Override the client-level default timeout for this request, in seconds """ - if not task_id: - raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}") + if not benchmark_id: + raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}") return self._post( - f"/v1/eval/tasks/{task_id}/jobs", + f"/v1/eval/benchmarks/{benchmark_id}/jobs", body=maybe_transform({"task_config": task_config}, eval_run_eval_params.EvalRunEvalParams), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout @@ -242,7 +242,7 @@ def with_streaming_response(self) -> AsyncEvalResourceWithStreamingResponse: async def evaluate_rows( self, - task_id: str, + benchmark_id: str, *, input_rows: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]], scoring_functions: List[str], @@ -264,10 +264,10 @@ async def evaluate_rows( timeout: Override the client-level default timeout for this request, in seconds """ - if not task_id: - raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}") + if not benchmark_id: + raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}") return await self._post( - f"/v1/eval/tasks/{task_id}/evaluations", + f"/v1/eval/benchmarks/{benchmark_id}/evaluations", body=await async_maybe_transform( { "input_rows": input_rows, @@ -326,7 +326,7 @@ async def evaluate_rows_alpha( async def run_eval( self, - task_id: str, + benchmark_id: str, *, task_config: BenchmarkConfigParam, # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. @@ -346,10 +346,10 @@ async def run_eval( timeout: Override the client-level default timeout for this request, in seconds """ - if not task_id: - raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}") + if not benchmark_id: + raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}") return await self._post( - f"/v1/eval/tasks/{task_id}/jobs", + f"/v1/eval/benchmarks/{benchmark_id}/jobs", body=await async_maybe_transform({"task_config": task_config}, eval_run_eval_params.EvalRunEvalParams), options=make_request_options( extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout diff --git a/src/llama_stack_client/types/__init__.py b/src/llama_stack_client/types/__init__.py index bc94eb13..ed400c28 100644 --- a/src/llama_stack_client/types/__init__.py +++ b/src/llama_stack_client/types/__init__.py @@ -86,7 +86,6 @@ from .vector_io_query_params import VectorIoQueryParams as VectorIoQueryParams from .benchmark_list_response import BenchmarkListResponse as BenchmarkListResponse from .dataset_register_params import DatasetRegisterParams as DatasetRegisterParams -from .eval_task_list_response import EvalTaskListResponse as EvalTaskListResponse from .list_providers_response import ListProvidersResponse as ListProvidersResponse from .scoring_fn_params_param import ScoringFnParamsParam as ScoringFnParamsParam from .toolgroup_list_response import ToolgroupListResponse as ToolgroupListResponse @@ -98,7 +97,6 @@ from .benchmark_register_params import BenchmarkRegisterParams as BenchmarkRegisterParams from .dataset_retrieve_response import DatasetRetrieveResponse as DatasetRetrieveResponse from .eval_evaluate_rows_params import EvalEvaluateRowsParams as EvalEvaluateRowsParams -from .eval_task_register_params import EvalTaskRegisterParams as EvalTaskRegisterParams from .list_tool_groups_response import ListToolGroupsResponse as ListToolGroupsResponse from .toolgroup_register_params import ToolgroupRegisterParams as ToolgroupRegisterParams from .vector_db_register_params import VectorDBRegisterParams as VectorDBRegisterParams diff --git a/src/llama_stack_client/types/benchmark_config_param.py b/src/llama_stack_client/types/benchmark_config_param.py index 48090c5f..f958833a 100644 --- a/src/llama_stack_client/types/benchmark_config_param.py +++ b/src/llama_stack_client/types/benchmark_config_param.py @@ -3,7 +3,7 @@ from __future__ import annotations from typing import Dict -from typing_extensions import Literal, Required, TypedDict +from typing_extensions import Required, TypedDict from .eval_candidate_param import EvalCandidateParam from .scoring_fn_params_param import ScoringFnParamsParam @@ -16,6 +16,4 @@ class BenchmarkConfigParam(TypedDict, total=False): scoring_params: Required[Dict[str, ScoringFnParamsParam]] - type: Required[Literal["benchmark"]] - num_examples: int diff --git a/tests/api_resources/test_eval.py b/tests/api_resources/test_eval.py index de5d0cac..cf120885 100644 --- a/tests/api_resources/test_eval.py +++ b/tests/api_resources/test_eval.py @@ -23,7 +23,7 @@ class TestEval: @parametrize def test_method_evaluate_rows(self, client: LlamaStackClient) -> None: eval = client.eval.evaluate_rows( - task_id="task_id", + benchmark_id="benchmark_id", input_rows=[{"foo": True}], scoring_functions=["string"], task_config={ @@ -38,7 +38,6 @@ def test_method_evaluate_rows(self, client: LlamaStackClient) -> None: "type": "llm_as_judge", } }, - "type": "benchmark", }, ) assert_matches_type(EvaluateResponse, eval, path=["response"]) @@ -46,7 +45,7 @@ def test_method_evaluate_rows(self, client: LlamaStackClient) -> None: @parametrize def test_method_evaluate_rows_with_all_params(self, client: LlamaStackClient) -> None: eval = client.eval.evaluate_rows( - task_id="task_id", + benchmark_id="benchmark_id", input_rows=[{"foo": True}], scoring_functions=["string"], task_config={ @@ -72,7 +71,6 @@ def test_method_evaluate_rows_with_all_params(self, client: LlamaStackClient) -> "prompt_template": "prompt_template", } }, - "type": "benchmark", "num_examples": 0, }, ) @@ -81,7 +79,7 @@ def test_method_evaluate_rows_with_all_params(self, client: LlamaStackClient) -> @parametrize def test_raw_response_evaluate_rows(self, client: LlamaStackClient) -> None: response = client.eval.with_raw_response.evaluate_rows( - task_id="task_id", + benchmark_id="benchmark_id", input_rows=[{"foo": True}], scoring_functions=["string"], task_config={ @@ -96,7 +94,6 @@ def test_raw_response_evaluate_rows(self, client: LlamaStackClient) -> None: "type": "llm_as_judge", } }, - "type": "benchmark", }, ) @@ -108,7 +105,7 @@ def test_raw_response_evaluate_rows(self, client: LlamaStackClient) -> None: @parametrize def test_streaming_response_evaluate_rows(self, client: LlamaStackClient) -> None: with client.eval.with_streaming_response.evaluate_rows( - task_id="task_id", + benchmark_id="benchmark_id", input_rows=[{"foo": True}], scoring_functions=["string"], task_config={ @@ -123,7 +120,6 @@ def test_streaming_response_evaluate_rows(self, client: LlamaStackClient) -> Non "type": "llm_as_judge", } }, - "type": "benchmark", }, ) as response: assert not response.is_closed @@ -136,9 +132,9 @@ def test_streaming_response_evaluate_rows(self, client: LlamaStackClient) -> Non @parametrize def test_path_params_evaluate_rows(self, client: LlamaStackClient) -> None: - with pytest.raises(ValueError, match=r"Expected a non-empty value for `task_id` but received ''"): + with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"): client.eval.with_raw_response.evaluate_rows( - task_id="", + benchmark_id="", input_rows=[{"foo": True}], scoring_functions=["string"], task_config={ @@ -153,7 +149,6 @@ def test_path_params_evaluate_rows(self, client: LlamaStackClient) -> None: "type": "llm_as_judge", } }, - "type": "benchmark", }, ) @@ -175,7 +170,6 @@ def test_method_evaluate_rows_alpha(self, client: LlamaStackClient) -> None: "type": "llm_as_judge", } }, - "type": "benchmark", }, ) assert_matches_type(EvaluateResponse, eval, path=["response"]) @@ -209,7 +203,6 @@ def test_method_evaluate_rows_alpha_with_all_params(self, client: LlamaStackClie "prompt_template": "prompt_template", } }, - "type": "benchmark", "num_examples": 0, }, ) @@ -233,7 +226,6 @@ def test_raw_response_evaluate_rows_alpha(self, client: LlamaStackClient) -> Non "type": "llm_as_judge", } }, - "type": "benchmark", }, ) @@ -260,7 +252,6 @@ def test_streaming_response_evaluate_rows_alpha(self, client: LlamaStackClient) "type": "llm_as_judge", } }, - "type": "benchmark", }, ) as response: assert not response.is_closed @@ -290,14 +281,13 @@ def test_path_params_evaluate_rows_alpha(self, client: LlamaStackClient) -> None "type": "llm_as_judge", } }, - "type": "benchmark", }, ) @parametrize def test_method_run_eval(self, client: LlamaStackClient) -> None: eval = client.eval.run_eval( - task_id="task_id", + benchmark_id="benchmark_id", task_config={ "eval_candidate": { "model": "model", @@ -310,7 +300,6 @@ def test_method_run_eval(self, client: LlamaStackClient) -> None: "type": "llm_as_judge", } }, - "type": "benchmark", }, ) assert_matches_type(Job, eval, path=["response"]) @@ -318,7 +307,7 @@ def test_method_run_eval(self, client: LlamaStackClient) -> None: @parametrize def test_method_run_eval_with_all_params(self, client: LlamaStackClient) -> None: eval = client.eval.run_eval( - task_id="task_id", + benchmark_id="benchmark_id", task_config={ "eval_candidate": { "model": "model", @@ -342,7 +331,6 @@ def test_method_run_eval_with_all_params(self, client: LlamaStackClient) -> None "prompt_template": "prompt_template", } }, - "type": "benchmark", "num_examples": 0, }, ) @@ -351,7 +339,7 @@ def test_method_run_eval_with_all_params(self, client: LlamaStackClient) -> None @parametrize def test_raw_response_run_eval(self, client: LlamaStackClient) -> None: response = client.eval.with_raw_response.run_eval( - task_id="task_id", + benchmark_id="benchmark_id", task_config={ "eval_candidate": { "model": "model", @@ -364,7 +352,6 @@ def test_raw_response_run_eval(self, client: LlamaStackClient) -> None: "type": "llm_as_judge", } }, - "type": "benchmark", }, ) @@ -376,7 +363,7 @@ def test_raw_response_run_eval(self, client: LlamaStackClient) -> None: @parametrize def test_streaming_response_run_eval(self, client: LlamaStackClient) -> None: with client.eval.with_streaming_response.run_eval( - task_id="task_id", + benchmark_id="benchmark_id", task_config={ "eval_candidate": { "model": "model", @@ -389,7 +376,6 @@ def test_streaming_response_run_eval(self, client: LlamaStackClient) -> None: "type": "llm_as_judge", } }, - "type": "benchmark", }, ) as response: assert not response.is_closed @@ -402,9 +388,9 @@ def test_streaming_response_run_eval(self, client: LlamaStackClient) -> None: @parametrize def test_path_params_run_eval(self, client: LlamaStackClient) -> None: - with pytest.raises(ValueError, match=r"Expected a non-empty value for `task_id` but received ''"): + with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"): client.eval.with_raw_response.run_eval( - task_id="", + benchmark_id="", task_config={ "eval_candidate": { "model": "model", @@ -417,7 +403,6 @@ def test_path_params_run_eval(self, client: LlamaStackClient) -> None: "type": "llm_as_judge", } }, - "type": "benchmark", }, ) @@ -437,7 +422,6 @@ def test_method_run_eval_alpha(self, client: LlamaStackClient) -> None: "type": "llm_as_judge", } }, - "type": "benchmark", }, ) assert_matches_type(Job, eval, path=["response"]) @@ -469,7 +453,6 @@ def test_method_run_eval_alpha_with_all_params(self, client: LlamaStackClient) - "prompt_template": "prompt_template", } }, - "type": "benchmark", "num_examples": 0, }, ) @@ -491,7 +474,6 @@ def test_raw_response_run_eval_alpha(self, client: LlamaStackClient) -> None: "type": "llm_as_judge", } }, - "type": "benchmark", }, ) @@ -516,7 +498,6 @@ def test_streaming_response_run_eval_alpha(self, client: LlamaStackClient) -> No "type": "llm_as_judge", } }, - "type": "benchmark", }, ) as response: assert not response.is_closed @@ -544,7 +525,6 @@ def test_path_params_run_eval_alpha(self, client: LlamaStackClient) -> None: "type": "llm_as_judge", } }, - "type": "benchmark", }, ) @@ -555,7 +535,7 @@ class TestAsyncEval: @parametrize async def test_method_evaluate_rows(self, async_client: AsyncLlamaStackClient) -> None: eval = await async_client.eval.evaluate_rows( - task_id="task_id", + benchmark_id="benchmark_id", input_rows=[{"foo": True}], scoring_functions=["string"], task_config={ @@ -570,7 +550,6 @@ async def test_method_evaluate_rows(self, async_client: AsyncLlamaStackClient) - "type": "llm_as_judge", } }, - "type": "benchmark", }, ) assert_matches_type(EvaluateResponse, eval, path=["response"]) @@ -578,7 +557,7 @@ async def test_method_evaluate_rows(self, async_client: AsyncLlamaStackClient) - @parametrize async def test_method_evaluate_rows_with_all_params(self, async_client: AsyncLlamaStackClient) -> None: eval = await async_client.eval.evaluate_rows( - task_id="task_id", + benchmark_id="benchmark_id", input_rows=[{"foo": True}], scoring_functions=["string"], task_config={ @@ -604,7 +583,6 @@ async def test_method_evaluate_rows_with_all_params(self, async_client: AsyncLla "prompt_template": "prompt_template", } }, - "type": "benchmark", "num_examples": 0, }, ) @@ -613,7 +591,7 @@ async def test_method_evaluate_rows_with_all_params(self, async_client: AsyncLla @parametrize async def test_raw_response_evaluate_rows(self, async_client: AsyncLlamaStackClient) -> None: response = await async_client.eval.with_raw_response.evaluate_rows( - task_id="task_id", + benchmark_id="benchmark_id", input_rows=[{"foo": True}], scoring_functions=["string"], task_config={ @@ -628,7 +606,6 @@ async def test_raw_response_evaluate_rows(self, async_client: AsyncLlamaStackCli "type": "llm_as_judge", } }, - "type": "benchmark", }, ) @@ -640,7 +617,7 @@ async def test_raw_response_evaluate_rows(self, async_client: AsyncLlamaStackCli @parametrize async def test_streaming_response_evaluate_rows(self, async_client: AsyncLlamaStackClient) -> None: async with async_client.eval.with_streaming_response.evaluate_rows( - task_id="task_id", + benchmark_id="benchmark_id", input_rows=[{"foo": True}], scoring_functions=["string"], task_config={ @@ -655,7 +632,6 @@ async def test_streaming_response_evaluate_rows(self, async_client: AsyncLlamaSt "type": "llm_as_judge", } }, - "type": "benchmark", }, ) as response: assert not response.is_closed @@ -668,9 +644,9 @@ async def test_streaming_response_evaluate_rows(self, async_client: AsyncLlamaSt @parametrize async def test_path_params_evaluate_rows(self, async_client: AsyncLlamaStackClient) -> None: - with pytest.raises(ValueError, match=r"Expected a non-empty value for `task_id` but received ''"): + with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"): await async_client.eval.with_raw_response.evaluate_rows( - task_id="", + benchmark_id="", input_rows=[{"foo": True}], scoring_functions=["string"], task_config={ @@ -685,7 +661,6 @@ async def test_path_params_evaluate_rows(self, async_client: AsyncLlamaStackClie "type": "llm_as_judge", } }, - "type": "benchmark", }, ) @@ -707,7 +682,6 @@ async def test_method_evaluate_rows_alpha(self, async_client: AsyncLlamaStackCli "type": "llm_as_judge", } }, - "type": "benchmark", }, ) assert_matches_type(EvaluateResponse, eval, path=["response"]) @@ -741,7 +715,6 @@ async def test_method_evaluate_rows_alpha_with_all_params(self, async_client: As "prompt_template": "prompt_template", } }, - "type": "benchmark", "num_examples": 0, }, ) @@ -765,7 +738,6 @@ async def test_raw_response_evaluate_rows_alpha(self, async_client: AsyncLlamaSt "type": "llm_as_judge", } }, - "type": "benchmark", }, ) @@ -792,7 +764,6 @@ async def test_streaming_response_evaluate_rows_alpha(self, async_client: AsyncL "type": "llm_as_judge", } }, - "type": "benchmark", }, ) as response: assert not response.is_closed @@ -822,14 +793,13 @@ async def test_path_params_evaluate_rows_alpha(self, async_client: AsyncLlamaSta "type": "llm_as_judge", } }, - "type": "benchmark", }, ) @parametrize async def test_method_run_eval(self, async_client: AsyncLlamaStackClient) -> None: eval = await async_client.eval.run_eval( - task_id="task_id", + benchmark_id="benchmark_id", task_config={ "eval_candidate": { "model": "model", @@ -842,7 +812,6 @@ async def test_method_run_eval(self, async_client: AsyncLlamaStackClient) -> Non "type": "llm_as_judge", } }, - "type": "benchmark", }, ) assert_matches_type(Job, eval, path=["response"]) @@ -850,7 +819,7 @@ async def test_method_run_eval(self, async_client: AsyncLlamaStackClient) -> Non @parametrize async def test_method_run_eval_with_all_params(self, async_client: AsyncLlamaStackClient) -> None: eval = await async_client.eval.run_eval( - task_id="task_id", + benchmark_id="benchmark_id", task_config={ "eval_candidate": { "model": "model", @@ -874,7 +843,6 @@ async def test_method_run_eval_with_all_params(self, async_client: AsyncLlamaSta "prompt_template": "prompt_template", } }, - "type": "benchmark", "num_examples": 0, }, ) @@ -883,7 +851,7 @@ async def test_method_run_eval_with_all_params(self, async_client: AsyncLlamaSta @parametrize async def test_raw_response_run_eval(self, async_client: AsyncLlamaStackClient) -> None: response = await async_client.eval.with_raw_response.run_eval( - task_id="task_id", + benchmark_id="benchmark_id", task_config={ "eval_candidate": { "model": "model", @@ -896,7 +864,6 @@ async def test_raw_response_run_eval(self, async_client: AsyncLlamaStackClient) "type": "llm_as_judge", } }, - "type": "benchmark", }, ) @@ -908,7 +875,7 @@ async def test_raw_response_run_eval(self, async_client: AsyncLlamaStackClient) @parametrize async def test_streaming_response_run_eval(self, async_client: AsyncLlamaStackClient) -> None: async with async_client.eval.with_streaming_response.run_eval( - task_id="task_id", + benchmark_id="benchmark_id", task_config={ "eval_candidate": { "model": "model", @@ -921,7 +888,6 @@ async def test_streaming_response_run_eval(self, async_client: AsyncLlamaStackCl "type": "llm_as_judge", } }, - "type": "benchmark", }, ) as response: assert not response.is_closed @@ -934,9 +900,9 @@ async def test_streaming_response_run_eval(self, async_client: AsyncLlamaStackCl @parametrize async def test_path_params_run_eval(self, async_client: AsyncLlamaStackClient) -> None: - with pytest.raises(ValueError, match=r"Expected a non-empty value for `task_id` but received ''"): + with pytest.raises(ValueError, match=r"Expected a non-empty value for `benchmark_id` but received ''"): await async_client.eval.with_raw_response.run_eval( - task_id="", + benchmark_id="", task_config={ "eval_candidate": { "model": "model", @@ -949,7 +915,6 @@ async def test_path_params_run_eval(self, async_client: AsyncLlamaStackClient) - "type": "llm_as_judge", } }, - "type": "benchmark", }, ) @@ -969,7 +934,6 @@ async def test_method_run_eval_alpha(self, async_client: AsyncLlamaStackClient) "type": "llm_as_judge", } }, - "type": "benchmark", }, ) assert_matches_type(Job, eval, path=["response"]) @@ -1001,7 +965,6 @@ async def test_method_run_eval_alpha_with_all_params(self, async_client: AsyncLl "prompt_template": "prompt_template", } }, - "type": "benchmark", "num_examples": 0, }, ) @@ -1023,7 +986,6 @@ async def test_raw_response_run_eval_alpha(self, async_client: AsyncLlamaStackCl "type": "llm_as_judge", } }, - "type": "benchmark", }, ) @@ -1048,7 +1010,6 @@ async def test_streaming_response_run_eval_alpha(self, async_client: AsyncLlamaS "type": "llm_as_judge", } }, - "type": "benchmark", }, ) as response: assert not response.is_closed @@ -1076,6 +1037,5 @@ async def test_path_params_run_eval_alpha(self, async_client: AsyncLlamaStackCli "type": "llm_as_judge", } }, - "type": "benchmark", }, )