Skip to content

Commit 39b1248

Browse files
authored
chore: deprecate eval task (Sync updates from stainless branch: main) (#150)
# What does this PR do? - See llamastack/llama-stack#1186 ## Test Plan - Test in llamastack/llama-stack#1186 [//]: # (## Documentation) [//]: # (- [ ] Added a Changelog entry if the change is significant)
1 parent f5e1078 commit 39b1248

File tree

6 files changed

+41
-108
lines changed

6 files changed

+41
-108
lines changed

src/llama_stack_client/_client.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@
4040
telemetry,
4141
vector_io,
4242
benchmarks,
43-
eval_tasks,
4443
toolgroups,
4544
vector_dbs,
4645
batch_inference,
@@ -94,7 +93,6 @@ class LlamaStackClient(SyncAPIClient):
9493
datasetio: datasetio.DatasetioResource
9594
scoring: scoring.ScoringResource
9695
scoring_functions: scoring_functions.ScoringFunctionsResource
97-
eval_tasks: eval_tasks.EvalTasksResource
9896
benchmarks: benchmarks.BenchmarksResource
9997
with_raw_response: LlamaStackClientWithRawResponse
10098
with_streaming_response: LlamaStackClientWithStreamedResponse
@@ -177,7 +175,6 @@ def __init__(
177175
self.datasetio = datasetio.DatasetioResource(self)
178176
self.scoring = scoring.ScoringResource(self)
179177
self.scoring_functions = scoring_functions.ScoringFunctionsResource(self)
180-
self.eval_tasks = eval_tasks.EvalTasksResource(self)
181178
self.benchmarks = benchmarks.BenchmarksResource(self)
182179
self.with_raw_response = LlamaStackClientWithRawResponse(self)
183180
self.with_streaming_response = LlamaStackClientWithStreamedResponse(self)
@@ -312,7 +309,6 @@ class AsyncLlamaStackClient(AsyncAPIClient):
312309
datasetio: datasetio.AsyncDatasetioResource
313310
scoring: scoring.AsyncScoringResource
314311
scoring_functions: scoring_functions.AsyncScoringFunctionsResource
315-
eval_tasks: eval_tasks.AsyncEvalTasksResource
316312
benchmarks: benchmarks.AsyncBenchmarksResource
317313
with_raw_response: AsyncLlamaStackClientWithRawResponse
318314
with_streaming_response: AsyncLlamaStackClientWithStreamedResponse
@@ -395,7 +391,6 @@ def __init__(
395391
self.datasetio = datasetio.AsyncDatasetioResource(self)
396392
self.scoring = scoring.AsyncScoringResource(self)
397393
self.scoring_functions = scoring_functions.AsyncScoringFunctionsResource(self)
398-
self.eval_tasks = eval_tasks.AsyncEvalTasksResource(self)
399394
self.benchmarks = benchmarks.AsyncBenchmarksResource(self)
400395
self.with_raw_response = AsyncLlamaStackClientWithRawResponse(self)
401396
self.with_streaming_response = AsyncLlamaStackClientWithStreamedResponse(self)
@@ -533,7 +528,6 @@ def __init__(self, client: LlamaStackClient) -> None:
533528
self.datasetio = datasetio.DatasetioResourceWithRawResponse(client.datasetio)
534529
self.scoring = scoring.ScoringResourceWithRawResponse(client.scoring)
535530
self.scoring_functions = scoring_functions.ScoringFunctionsResourceWithRawResponse(client.scoring_functions)
536-
self.eval_tasks = eval_tasks.EvalTasksResourceWithRawResponse(client.eval_tasks)
537531
self.benchmarks = benchmarks.BenchmarksResourceWithRawResponse(client.benchmarks)
538532

539533

@@ -565,7 +559,6 @@ def __init__(self, client: AsyncLlamaStackClient) -> None:
565559
self.scoring_functions = scoring_functions.AsyncScoringFunctionsResourceWithRawResponse(
566560
client.scoring_functions
567561
)
568-
self.eval_tasks = eval_tasks.AsyncEvalTasksResourceWithRawResponse(client.eval_tasks)
569562
self.benchmarks = benchmarks.AsyncBenchmarksResourceWithRawResponse(client.benchmarks)
570563

571564

@@ -597,7 +590,6 @@ def __init__(self, client: LlamaStackClient) -> None:
597590
self.scoring_functions = scoring_functions.ScoringFunctionsResourceWithStreamingResponse(
598591
client.scoring_functions
599592
)
600-
self.eval_tasks = eval_tasks.EvalTasksResourceWithStreamingResponse(client.eval_tasks)
601593
self.benchmarks = benchmarks.BenchmarksResourceWithStreamingResponse(client.benchmarks)
602594

603595

@@ -631,7 +623,6 @@ def __init__(self, client: AsyncLlamaStackClient) -> None:
631623
self.scoring_functions = scoring_functions.AsyncScoringFunctionsResourceWithStreamingResponse(
632624
client.scoring_functions
633625
)
634-
self.eval_tasks = eval_tasks.AsyncEvalTasksResourceWithStreamingResponse(client.eval_tasks)
635626
self.benchmarks = benchmarks.AsyncBenchmarksResourceWithStreamingResponse(client.benchmarks)
636627

637628

src/llama_stack_client/resources/__init__.py

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -128,14 +128,6 @@
128128
BenchmarksResourceWithStreamingResponse,
129129
AsyncBenchmarksResourceWithStreamingResponse,
130130
)
131-
from .eval_tasks import (
132-
EvalTasksResource,
133-
AsyncEvalTasksResource,
134-
EvalTasksResourceWithRawResponse,
135-
AsyncEvalTasksResourceWithRawResponse,
136-
EvalTasksResourceWithStreamingResponse,
137-
AsyncEvalTasksResourceWithStreamingResponse,
138-
)
139131
from .toolgroups import (
140132
ToolgroupsResource,
141133
AsyncToolgroupsResource,
@@ -326,12 +318,6 @@
326318
"AsyncScoringFunctionsResourceWithRawResponse",
327319
"ScoringFunctionsResourceWithStreamingResponse",
328320
"AsyncScoringFunctionsResourceWithStreamingResponse",
329-
"EvalTasksResource",
330-
"AsyncEvalTasksResource",
331-
"EvalTasksResourceWithRawResponse",
332-
"AsyncEvalTasksResourceWithRawResponse",
333-
"EvalTasksResourceWithStreamingResponse",
334-
"AsyncEvalTasksResourceWithStreamingResponse",
335321
"BenchmarksResource",
336322
"AsyncBenchmarksResource",
337323
"BenchmarksResourceWithRawResponse",

src/llama_stack_client/resources/eval/eval.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def with_streaming_response(self) -> EvalResourceWithStreamingResponse:
6767

6868
def evaluate_rows(
6969
self,
70-
task_id: str,
70+
benchmark_id: str,
7171
*,
7272
input_rows: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]],
7373
scoring_functions: List[str],
@@ -89,10 +89,10 @@ def evaluate_rows(
8989
9090
timeout: Override the client-level default timeout for this request, in seconds
9191
"""
92-
if not task_id:
93-
raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}")
92+
if not benchmark_id:
93+
raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
9494
return self._post(
95-
f"/v1/eval/tasks/{task_id}/evaluations",
95+
f"/v1/eval/benchmarks/{benchmark_id}/evaluations",
9696
body=maybe_transform(
9797
{
9898
"input_rows": input_rows,
@@ -151,7 +151,7 @@ def evaluate_rows_alpha(
151151

152152
def run_eval(
153153
self,
154-
task_id: str,
154+
benchmark_id: str,
155155
*,
156156
task_config: BenchmarkConfigParam,
157157
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -171,10 +171,10 @@ def run_eval(
171171
172172
timeout: Override the client-level default timeout for this request, in seconds
173173
"""
174-
if not task_id:
175-
raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}")
174+
if not benchmark_id:
175+
raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
176176
return self._post(
177-
f"/v1/eval/tasks/{task_id}/jobs",
177+
f"/v1/eval/benchmarks/{benchmark_id}/jobs",
178178
body=maybe_transform({"task_config": task_config}, eval_run_eval_params.EvalRunEvalParams),
179179
options=make_request_options(
180180
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -242,7 +242,7 @@ def with_streaming_response(self) -> AsyncEvalResourceWithStreamingResponse:
242242

243243
async def evaluate_rows(
244244
self,
245-
task_id: str,
245+
benchmark_id: str,
246246
*,
247247
input_rows: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]],
248248
scoring_functions: List[str],
@@ -264,10 +264,10 @@ async def evaluate_rows(
264264
265265
timeout: Override the client-level default timeout for this request, in seconds
266266
"""
267-
if not task_id:
268-
raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}")
267+
if not benchmark_id:
268+
raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
269269
return await self._post(
270-
f"/v1/eval/tasks/{task_id}/evaluations",
270+
f"/v1/eval/benchmarks/{benchmark_id}/evaluations",
271271
body=await async_maybe_transform(
272272
{
273273
"input_rows": input_rows,
@@ -326,7 +326,7 @@ async def evaluate_rows_alpha(
326326

327327
async def run_eval(
328328
self,
329-
task_id: str,
329+
benchmark_id: str,
330330
*,
331331
task_config: BenchmarkConfigParam,
332332
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -346,10 +346,10 @@ async def run_eval(
346346
347347
timeout: Override the client-level default timeout for this request, in seconds
348348
"""
349-
if not task_id:
350-
raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}")
349+
if not benchmark_id:
350+
raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
351351
return await self._post(
352-
f"/v1/eval/tasks/{task_id}/jobs",
352+
f"/v1/eval/benchmarks/{benchmark_id}/jobs",
353353
body=await async_maybe_transform({"task_config": task_config}, eval_run_eval_params.EvalRunEvalParams),
354354
options=make_request_options(
355355
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout

src/llama_stack_client/types/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,6 @@
8686
from .vector_io_query_params import VectorIoQueryParams as VectorIoQueryParams
8787
from .benchmark_list_response import BenchmarkListResponse as BenchmarkListResponse
8888
from .dataset_register_params import DatasetRegisterParams as DatasetRegisterParams
89-
from .eval_task_list_response import EvalTaskListResponse as EvalTaskListResponse
9089
from .list_providers_response import ListProvidersResponse as ListProvidersResponse
9190
from .scoring_fn_params_param import ScoringFnParamsParam as ScoringFnParamsParam
9291
from .toolgroup_list_response import ToolgroupListResponse as ToolgroupListResponse
@@ -98,7 +97,6 @@
9897
from .benchmark_register_params import BenchmarkRegisterParams as BenchmarkRegisterParams
9998
from .dataset_retrieve_response import DatasetRetrieveResponse as DatasetRetrieveResponse
10099
from .eval_evaluate_rows_params import EvalEvaluateRowsParams as EvalEvaluateRowsParams
101-
from .eval_task_register_params import EvalTaskRegisterParams as EvalTaskRegisterParams
102100
from .list_tool_groups_response import ListToolGroupsResponse as ListToolGroupsResponse
103101
from .toolgroup_register_params import ToolgroupRegisterParams as ToolgroupRegisterParams
104102
from .vector_db_register_params import VectorDBRegisterParams as VectorDBRegisterParams

src/llama_stack_client/types/benchmark_config_param.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from __future__ import annotations
44

55
from typing import Dict
6-
from typing_extensions import Literal, Required, TypedDict
6+
from typing_extensions import Required, TypedDict
77

88
from .eval_candidate_param import EvalCandidateParam
99
from .scoring_fn_params_param import ScoringFnParamsParam
@@ -16,6 +16,4 @@ class BenchmarkConfigParam(TypedDict, total=False):
1616

1717
scoring_params: Required[Dict[str, ScoringFnParamsParam]]
1818

19-
type: Required[Literal["benchmark"]]
20-
2119
num_examples: int

0 commit comments

Comments
 (0)