Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 0 additions & 9 deletions src/llama_stack_client/_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
telemetry,
vector_io,
benchmarks,
eval_tasks,
toolgroups,
vector_dbs,
batch_inference,
Expand Down Expand Up @@ -94,7 +93,6 @@ class LlamaStackClient(SyncAPIClient):
datasetio: datasetio.DatasetioResource
scoring: scoring.ScoringResource
scoring_functions: scoring_functions.ScoringFunctionsResource
eval_tasks: eval_tasks.EvalTasksResource
benchmarks: benchmarks.BenchmarksResource
with_raw_response: LlamaStackClientWithRawResponse
with_streaming_response: LlamaStackClientWithStreamedResponse
Expand Down Expand Up @@ -177,7 +175,6 @@ def __init__(
self.datasetio = datasetio.DatasetioResource(self)
self.scoring = scoring.ScoringResource(self)
self.scoring_functions = scoring_functions.ScoringFunctionsResource(self)
self.eval_tasks = eval_tasks.EvalTasksResource(self)
self.benchmarks = benchmarks.BenchmarksResource(self)
self.with_raw_response = LlamaStackClientWithRawResponse(self)
self.with_streaming_response = LlamaStackClientWithStreamedResponse(self)
Expand Down Expand Up @@ -312,7 +309,6 @@ class AsyncLlamaStackClient(AsyncAPIClient):
datasetio: datasetio.AsyncDatasetioResource
scoring: scoring.AsyncScoringResource
scoring_functions: scoring_functions.AsyncScoringFunctionsResource
eval_tasks: eval_tasks.AsyncEvalTasksResource
benchmarks: benchmarks.AsyncBenchmarksResource
with_raw_response: AsyncLlamaStackClientWithRawResponse
with_streaming_response: AsyncLlamaStackClientWithStreamedResponse
Expand Down Expand Up @@ -395,7 +391,6 @@ def __init__(
self.datasetio = datasetio.AsyncDatasetioResource(self)
self.scoring = scoring.AsyncScoringResource(self)
self.scoring_functions = scoring_functions.AsyncScoringFunctionsResource(self)
self.eval_tasks = eval_tasks.AsyncEvalTasksResource(self)
self.benchmarks = benchmarks.AsyncBenchmarksResource(self)
self.with_raw_response = AsyncLlamaStackClientWithRawResponse(self)
self.with_streaming_response = AsyncLlamaStackClientWithStreamedResponse(self)
Expand Down Expand Up @@ -533,7 +528,6 @@ def __init__(self, client: LlamaStackClient) -> None:
self.datasetio = datasetio.DatasetioResourceWithRawResponse(client.datasetio)
self.scoring = scoring.ScoringResourceWithRawResponse(client.scoring)
self.scoring_functions = scoring_functions.ScoringFunctionsResourceWithRawResponse(client.scoring_functions)
self.eval_tasks = eval_tasks.EvalTasksResourceWithRawResponse(client.eval_tasks)
self.benchmarks = benchmarks.BenchmarksResourceWithRawResponse(client.benchmarks)


Expand Down Expand Up @@ -565,7 +559,6 @@ def __init__(self, client: AsyncLlamaStackClient) -> None:
self.scoring_functions = scoring_functions.AsyncScoringFunctionsResourceWithRawResponse(
client.scoring_functions
)
self.eval_tasks = eval_tasks.AsyncEvalTasksResourceWithRawResponse(client.eval_tasks)
self.benchmarks = benchmarks.AsyncBenchmarksResourceWithRawResponse(client.benchmarks)


Expand Down Expand Up @@ -597,7 +590,6 @@ def __init__(self, client: LlamaStackClient) -> None:
self.scoring_functions = scoring_functions.ScoringFunctionsResourceWithStreamingResponse(
client.scoring_functions
)
self.eval_tasks = eval_tasks.EvalTasksResourceWithStreamingResponse(client.eval_tasks)
self.benchmarks = benchmarks.BenchmarksResourceWithStreamingResponse(client.benchmarks)


Expand Down Expand Up @@ -631,7 +623,6 @@ def __init__(self, client: AsyncLlamaStackClient) -> None:
self.scoring_functions = scoring_functions.AsyncScoringFunctionsResourceWithStreamingResponse(
client.scoring_functions
)
self.eval_tasks = eval_tasks.AsyncEvalTasksResourceWithStreamingResponse(client.eval_tasks)
self.benchmarks = benchmarks.AsyncBenchmarksResourceWithStreamingResponse(client.benchmarks)


Expand Down
14 changes: 0 additions & 14 deletions src/llama_stack_client/resources/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,14 +128,6 @@
BenchmarksResourceWithStreamingResponse,
AsyncBenchmarksResourceWithStreamingResponse,
)
from .eval_tasks import (
EvalTasksResource,
AsyncEvalTasksResource,
EvalTasksResourceWithRawResponse,
AsyncEvalTasksResourceWithRawResponse,
EvalTasksResourceWithStreamingResponse,
AsyncEvalTasksResourceWithStreamingResponse,
)
from .toolgroups import (
ToolgroupsResource,
AsyncToolgroupsResource,
Expand Down Expand Up @@ -326,12 +318,6 @@
"AsyncScoringFunctionsResourceWithRawResponse",
"ScoringFunctionsResourceWithStreamingResponse",
"AsyncScoringFunctionsResourceWithStreamingResponse",
"EvalTasksResource",
"AsyncEvalTasksResource",
"EvalTasksResourceWithRawResponse",
"AsyncEvalTasksResourceWithRawResponse",
"EvalTasksResourceWithStreamingResponse",
"AsyncEvalTasksResourceWithStreamingResponse",
"BenchmarksResource",
"AsyncBenchmarksResource",
"BenchmarksResourceWithRawResponse",
Expand Down
32 changes: 16 additions & 16 deletions src/llama_stack_client/resources/eval/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def with_streaming_response(self) -> EvalResourceWithStreamingResponse:

def evaluate_rows(
self,
task_id: str,
benchmark_id: str,
*,
input_rows: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]],
scoring_functions: List[str],
Expand All @@ -89,10 +89,10 @@ def evaluate_rows(

timeout: Override the client-level default timeout for this request, in seconds
"""
if not task_id:
raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}")
if not benchmark_id:
raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
return self._post(
f"/v1/eval/tasks/{task_id}/evaluations",
f"/v1/eval/benchmarks/{benchmark_id}/evaluations",
body=maybe_transform(
{
"input_rows": input_rows,
Expand Down Expand Up @@ -151,7 +151,7 @@ def evaluate_rows_alpha(

def run_eval(
self,
task_id: str,
benchmark_id: str,
*,
task_config: BenchmarkConfigParam,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
Expand All @@ -171,10 +171,10 @@ def run_eval(

timeout: Override the client-level default timeout for this request, in seconds
"""
if not task_id:
raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}")
if not benchmark_id:
raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
return self._post(
f"/v1/eval/tasks/{task_id}/jobs",
f"/v1/eval/benchmarks/{benchmark_id}/jobs",
body=maybe_transform({"task_config": task_config}, eval_run_eval_params.EvalRunEvalParams),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
Expand Down Expand Up @@ -242,7 +242,7 @@ def with_streaming_response(self) -> AsyncEvalResourceWithStreamingResponse:

async def evaluate_rows(
self,
task_id: str,
benchmark_id: str,
*,
input_rows: Iterable[Dict[str, Union[bool, float, str, Iterable[object], object, None]]],
scoring_functions: List[str],
Expand All @@ -264,10 +264,10 @@ async def evaluate_rows(

timeout: Override the client-level default timeout for this request, in seconds
"""
if not task_id:
raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}")
if not benchmark_id:
raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
return await self._post(
f"/v1/eval/tasks/{task_id}/evaluations",
f"/v1/eval/benchmarks/{benchmark_id}/evaluations",
body=await async_maybe_transform(
{
"input_rows": input_rows,
Expand Down Expand Up @@ -326,7 +326,7 @@ async def evaluate_rows_alpha(

async def run_eval(
self,
task_id: str,
benchmark_id: str,
*,
task_config: BenchmarkConfigParam,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
Expand All @@ -346,10 +346,10 @@ async def run_eval(

timeout: Override the client-level default timeout for this request, in seconds
"""
if not task_id:
raise ValueError(f"Expected a non-empty value for `task_id` but received {task_id!r}")
if not benchmark_id:
raise ValueError(f"Expected a non-empty value for `benchmark_id` but received {benchmark_id!r}")
return await self._post(
f"/v1/eval/tasks/{task_id}/jobs",
f"/v1/eval/benchmarks/{benchmark_id}/jobs",
body=await async_maybe_transform({"task_config": task_config}, eval_run_eval_params.EvalRunEvalParams),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
Expand Down
2 changes: 0 additions & 2 deletions src/llama_stack_client/types/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@
from .vector_io_query_params import VectorIoQueryParams as VectorIoQueryParams
from .benchmark_list_response import BenchmarkListResponse as BenchmarkListResponse
from .dataset_register_params import DatasetRegisterParams as DatasetRegisterParams
from .eval_task_list_response import EvalTaskListResponse as EvalTaskListResponse
from .list_providers_response import ListProvidersResponse as ListProvidersResponse
from .scoring_fn_params_param import ScoringFnParamsParam as ScoringFnParamsParam
from .toolgroup_list_response import ToolgroupListResponse as ToolgroupListResponse
Expand All @@ -98,7 +97,6 @@
from .benchmark_register_params import BenchmarkRegisterParams as BenchmarkRegisterParams
from .dataset_retrieve_response import DatasetRetrieveResponse as DatasetRetrieveResponse
from .eval_evaluate_rows_params import EvalEvaluateRowsParams as EvalEvaluateRowsParams
from .eval_task_register_params import EvalTaskRegisterParams as EvalTaskRegisterParams
from .list_tool_groups_response import ListToolGroupsResponse as ListToolGroupsResponse
from .toolgroup_register_params import ToolgroupRegisterParams as ToolgroupRegisterParams
from .vector_db_register_params import VectorDBRegisterParams as VectorDBRegisterParams
Expand Down
4 changes: 1 addition & 3 deletions src/llama_stack_client/types/benchmark_config_param.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from __future__ import annotations

from typing import Dict
from typing_extensions import Literal, Required, TypedDict
from typing_extensions import Required, TypedDict

from .eval_candidate_param import EvalCandidateParam
from .scoring_fn_params_param import ScoringFnParamsParam
Expand All @@ -16,6 +16,4 @@ class BenchmarkConfigParam(TypedDict, total=False):

scoring_params: Required[Dict[str, ScoringFnParamsParam]]

type: Required[Literal["benchmark"]]

num_examples: int
Loading