From e1b30fc9a584f74d6901ee71ceceda03f7d0e683 Mon Sep 17 00:00:00 2001 From: newokaerinasai Date: Fri, 26 Sep 2025 13:23:39 +0100 Subject: [PATCH 1/5] add external --- src/together/resources/evaluation.py | 6 ++++++ src/together/types/evaluation.py | 2 ++ 2 files changed, 8 insertions(+) diff --git a/src/together/resources/evaluation.py b/src/together/resources/evaluation.py index 1b631b9..f61858b 100644 --- a/src/together/resources/evaluation.py +++ b/src/together/resources/evaluation.py @@ -32,6 +32,7 @@ def create( judge_system_template: str, input_data_file_path: str, judge_external_api_token: Optional[str] = None, + judge_external_base_url: Optional[List[str]] = None, # Classify-specific parameters labels: Optional[List[str]] = None, pass_labels: Optional[List[str]] = None, @@ -55,6 +56,7 @@ def create( judge_system_template: System template for the judge input_data_file_path: Path to input data file judge_external_api_token: Optional external API token for the judge model + judge_external_base_url: Optional external base URLs for the judge model labels: List of classification labels (required for classify) pass_labels: List of labels considered as passing (required for classify) min_score: Minimum score value (required for score) @@ -82,6 +84,7 @@ def create( model_source=judge_model_source, system_template=judge_system_template, external_api_token=judge_external_api_token, + external_base_url=judge_external_base_url, ) parameters: Union[ClassifyParameters, ScoreParameters, CompareParameters] # Build parameters based on type @@ -423,6 +426,7 @@ async def create( judge_system_template: str, input_data_file_path: str, judge_external_api_token: Optional[str] = None, + judge_external_base_url: Optional[List[str]] = None, # Classify-specific parameters labels: Optional[List[str]] = None, pass_labels: Optional[List[str]] = None, @@ -446,6 +450,7 @@ async def create( judge_system_template: System template for the judge input_data_file_path: Path to input data file judge_external_api_token: Optional external API token for the judge model + judge_external_base_url: Optional external base URLs for the judge model labels: List of classification labels (required for classify) pass_labels: List of labels considered as passing (required for classify) min_score: Minimum score value (required for score) @@ -473,6 +478,7 @@ async def create( model_source=judge_model_source, system_template=judge_system_template, external_api_token=judge_external_api_token, + external_base_url=judge_external_base_url, ) parameters: Union[ClassifyParameters, ScoreParameters, CompareParameters] # Build parameters based on type diff --git a/src/together/types/evaluation.py b/src/together/types/evaluation.py index 3375912..e3837d6 100644 --- a/src/together/types/evaluation.py +++ b/src/together/types/evaluation.py @@ -27,6 +27,7 @@ class JudgeModelConfig(BaseModel): model_source: Literal["serverless", "dedicated", "external"] system_template: str external_api_token: Optional[str] = None + external_base_url: Optional[List[str]] = None class ModelRequest(BaseModel): @@ -37,6 +38,7 @@ class ModelRequest(BaseModel): system_template: str input_template: str external_api_token: Optional[str] = None + external_base_url: Optional[List[str]] = None class ClassifyParameters(BaseModel): From 563b44c8448e39d2fa1123bcb6447e0544c88169 Mon Sep 17 00:00:00 2001 From: newokaerinasai Date: Fri, 26 Sep 2025 13:33:35 +0100 Subject: [PATCH 2/5] Update evaluation.py --- src/together/cli/api/evaluation.py | 34 ++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/src/together/cli/api/evaluation.py b/src/together/cli/api/evaluation.py index ff8200b..1526760 100644 --- a/src/together/cli/api/evaluation.py +++ b/src/together/cli/api/evaluation.py @@ -41,6 +41,12 @@ def evaluation(ctx: click.Context) -> None: required=False, help="Optional external API token for the judge model.", ) +@click.option( + "--judge-external-base-url", + type=str, + required=False, + help="Optional external base URLs for the judge model.", +) @click.option( "--judge-system-template", type=str, @@ -74,6 +80,11 @@ def evaluation(ctx: click.Context) -> None: type=str, help="Optional external API token for the model to evaluate.", ) +@click.option( + "--model-to-evaluate-external-base-url", + type=str, + help="Optional external base URL for the model to evaluate.", +) @click.option( "--model-to-evaluate-max-tokens", type=int, @@ -140,6 +151,11 @@ def evaluation(ctx: click.Context) -> None: type=str, help="Optional external API token for model A.", ) +@click.option( + "--model-a-external-base-url", + type=str, + help="Optional external base URL for model A.", +) @click.option( "--model-a-max-tokens", type=int, @@ -181,6 +197,11 @@ def evaluation(ctx: click.Context) -> None: type=str, help="Optional external API token for model B.", ) +@click.option( + "--model-b-external-base-url", + type=str, + help="Optional external base URL for model B.", +) @click.option( "--model-b-max-tokens", type=int, @@ -208,11 +229,13 @@ def create( judge_model_source: str, judge_system_template: str, judge_external_api_token: Optional[str], + judge_external_base_url: Optional[str], input_data_file_path: str, model_field: Optional[str], model_to_evaluate: Optional[str], model_to_evaluate_source: Optional[str], model_to_evaluate_external_api_token: Optional[str], + model_to_evaluate_external_base_url: Optional[str], model_to_evaluate_max_tokens: Optional[int], model_to_evaluate_temperature: Optional[float], model_to_evaluate_system_template: Optional[str], @@ -226,6 +249,7 @@ def create( model_a: Optional[str], model_a_source: Optional[str], model_a_external_api_token: Optional[str], + model_a_external_base_url: Optional[str], model_a_max_tokens: Optional[int], model_a_temperature: Optional[float], model_a_system_template: Optional[str], @@ -234,6 +258,7 @@ def create( model_b: Optional[str], model_b_source: Optional[str], model_b_external_api_token: Optional[str], + model_b_external_base_url: Optional[str], model_b_max_tokens: Optional[int], model_b_temperature: Optional[float], model_b_system_template: Optional[str], @@ -285,6 +310,10 @@ def create( model_to_evaluate_final["external_api_token"] = ( model_to_evaluate_external_api_token ) + if model_to_evaluate_external_base_url: + model_to_evaluate_final["external_base_url"] = ( + model_to_evaluate_external_base_url + ) # Build model-a configuration model_a_final: Union[Dict[str, Any], None, str] = None @@ -318,6 +347,8 @@ def create( } if model_a_external_api_token: model_a_final["external_api_token"] = model_a_external_api_token + if model_a_external_base_url: + model_a_final["external_base_url"] = model_a_external_base_url # Build model-b configuration model_b_final: Union[Dict[str, Any], None, str] = None @@ -351,6 +382,8 @@ def create( } if model_b_external_api_token: model_b_final["external_api_token"] = model_b_external_api_token + if model_b_external_base_url: + model_b_final["external_base_url"] = model_b_external_base_url try: response = client.evaluation.create( @@ -359,6 +392,7 @@ def create( judge_model_source=judge_model_source, judge_system_template=judge_system_template, judge_external_api_token=judge_external_api_token, + judge_external_base_url=judge_external_base_url, input_data_file_path=input_data_file_path, model_to_evaluate=model_to_evaluate_final, labels=labels_list, From b0c49b524d013abea590551ad00221dbcd66e53e Mon Sep 17 00:00:00 2001 From: newokaerinasai Date: Fri, 26 Sep 2025 13:36:04 +0100 Subject: [PATCH 3/5] fix code error --- src/together/resources/evaluation.py | 4 ++-- src/together/types/evaluation.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/together/resources/evaluation.py b/src/together/resources/evaluation.py index f61858b..2f2a1f1 100644 --- a/src/together/resources/evaluation.py +++ b/src/together/resources/evaluation.py @@ -32,7 +32,7 @@ def create( judge_system_template: str, input_data_file_path: str, judge_external_api_token: Optional[str] = None, - judge_external_base_url: Optional[List[str]] = None, + judge_external_base_url: Optional[str] = None, # Classify-specific parameters labels: Optional[List[str]] = None, pass_labels: Optional[List[str]] = None, @@ -426,7 +426,7 @@ async def create( judge_system_template: str, input_data_file_path: str, judge_external_api_token: Optional[str] = None, - judge_external_base_url: Optional[List[str]] = None, + judge_external_base_url: Optional[str] = None, # Classify-specific parameters labels: Optional[List[str]] = None, pass_labels: Optional[List[str]] = None, diff --git a/src/together/types/evaluation.py b/src/together/types/evaluation.py index e3837d6..ca7ed9b 100644 --- a/src/together/types/evaluation.py +++ b/src/together/types/evaluation.py @@ -27,7 +27,7 @@ class JudgeModelConfig(BaseModel): model_source: Literal["serverless", "dedicated", "external"] system_template: str external_api_token: Optional[str] = None - external_base_url: Optional[List[str]] = None + external_base_url: Optional[str] = None class ModelRequest(BaseModel): @@ -38,7 +38,7 @@ class ModelRequest(BaseModel): system_template: str input_template: str external_api_token: Optional[str] = None - external_base_url: Optional[List[str]] = None + external_base_url: Optional[str] = None class ClassifyParameters(BaseModel): From b4cdad89a279440064297759fa09d21e2bee82d3 Mon Sep 17 00:00:00 2001 From: newokaerinasai Date: Wed, 15 Oct 2025 13:35:42 +0100 Subject: [PATCH 4/5] add dependency --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index ab6681a..8aed038 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,6 +67,7 @@ optional = true pytest = ">=7.4.2,<9.0.0" pytest-watch = "^4.2.0" pytest-mock = "^3.14.0" +pytest-asyncio = "^0.24.0" tox = "^4.14.1" [tool.poetry.group.examples] From aac807834e9dda8704665d0e91ce80b25885a9da Mon Sep 17 00:00:00 2001 From: newokaerinasai Date: Wed, 15 Oct 2025 13:42:03 +0100 Subject: [PATCH 5/5] add dep to poetry lock --- poetry.lock | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/poetry.lock b/poetry.lock index 08f5633..78745bb 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.2.0 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. [[package]] name = "aiohappyeyeballs" @@ -1782,6 +1782,25 @@ tomli = {version = ">=1", markers = "python_version < \"3.11\""} [package.extras] dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "requests", "setuptools", "xmlschema"] +[[package]] +name = "pytest-asyncio" +version = "0.24.0" +description = "Pytest support for asyncio" +optional = false +python-versions = ">=3.8" +groups = ["tests"] +files = [ + {file = "pytest_asyncio-0.24.0-py3-none-any.whl", hash = "sha256:a811296ed596b69bf0b6f3dc40f83bcaf341b155a269052d82efa2b25ac7037b"}, + {file = "pytest_asyncio-0.24.0.tar.gz", hash = "sha256:d081d828e576d85f875399194281e92bf8a68d60d72d1a2faf2feddb6c46b276"}, +] + +[package.dependencies] +pytest = ">=8.2,<9" + +[package.extras] +docs = ["sphinx (>=5.3)", "sphinx-rtd-theme (>=1.0)"] +testing = ["coverage (>=6.2)", "hypothesis (>=5.7.1)"] + [[package]] name = "pytest-mock" version = "3.15.1" @@ -2812,4 +2831,4 @@ pyarrow = ["pyarrow"] [metadata] lock-version = "2.1" python-versions = "^3.10" -content-hash = "65ae5544a73b730d48fd3c6b9d1d364fc4662e4b65f0995acfe4f6f4f440d8db" +content-hash = "394883338926938b8fca8e0feefa887251f11117b69f6ab902f0409686e26641"