diff --git a/poetry.lock b/poetry.lock index 08f5633..78745bb 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.2.0 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. [[package]] name = "aiohappyeyeballs" @@ -1782,6 +1782,25 @@ tomli = {version = ">=1", markers = "python_version < \"3.11\""} [package.extras] dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "requests", "setuptools", "xmlschema"] +[[package]] +name = "pytest-asyncio" +version = "0.24.0" +description = "Pytest support for asyncio" +optional = false +python-versions = ">=3.8" +groups = ["tests"] +files = [ + {file = "pytest_asyncio-0.24.0-py3-none-any.whl", hash = "sha256:a811296ed596b69bf0b6f3dc40f83bcaf341b155a269052d82efa2b25ac7037b"}, + {file = "pytest_asyncio-0.24.0.tar.gz", hash = "sha256:d081d828e576d85f875399194281e92bf8a68d60d72d1a2faf2feddb6c46b276"}, +] + +[package.dependencies] +pytest = ">=8.2,<9" + +[package.extras] +docs = ["sphinx (>=5.3)", "sphinx-rtd-theme (>=1.0)"] +testing = ["coverage (>=6.2)", "hypothesis (>=5.7.1)"] + [[package]] name = "pytest-mock" version = "3.15.1" @@ -2812,4 +2831,4 @@ pyarrow = ["pyarrow"] [metadata] lock-version = "2.1" python-versions = "^3.10" -content-hash = "65ae5544a73b730d48fd3c6b9d1d364fc4662e4b65f0995acfe4f6f4f440d8db" +content-hash = "394883338926938b8fca8e0feefa887251f11117b69f6ab902f0409686e26641" diff --git a/pyproject.toml b/pyproject.toml index 16fe6bb..be16f9b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,6 +67,7 @@ optional = true pytest = ">=7.4.2,<9.0.0" pytest-watch = "^4.2.0" pytest-mock = "^3.14.0" +pytest-asyncio = "^0.24.0" tox = "^4.14.1" [tool.poetry.group.examples] diff --git a/src/together/cli/api/evaluation.py b/src/together/cli/api/evaluation.py index ff8200b..1526760 100644 --- a/src/together/cli/api/evaluation.py +++ b/src/together/cli/api/evaluation.py @@ -41,6 +41,12 @@ def evaluation(ctx: click.Context) -> None: required=False, help="Optional external API token for the judge model.", ) +@click.option( + "--judge-external-base-url", + type=str, + required=False, + help="Optional external base URLs for the judge model.", +) @click.option( "--judge-system-template", type=str, @@ -74,6 +80,11 @@ def evaluation(ctx: click.Context) -> None: type=str, help="Optional external API token for the model to evaluate.", ) +@click.option( + "--model-to-evaluate-external-base-url", + type=str, + help="Optional external base URL for the model to evaluate.", +) @click.option( "--model-to-evaluate-max-tokens", type=int, @@ -140,6 +151,11 @@ def evaluation(ctx: click.Context) -> None: type=str, help="Optional external API token for model A.", ) +@click.option( + "--model-a-external-base-url", + type=str, + help="Optional external base URL for model A.", +) @click.option( "--model-a-max-tokens", type=int, @@ -181,6 +197,11 @@ def evaluation(ctx: click.Context) -> None: type=str, help="Optional external API token for model B.", ) +@click.option( + "--model-b-external-base-url", + type=str, + help="Optional external base URL for model B.", +) @click.option( "--model-b-max-tokens", type=int, @@ -208,11 +229,13 @@ def create( judge_model_source: str, judge_system_template: str, judge_external_api_token: Optional[str], + judge_external_base_url: Optional[str], input_data_file_path: str, model_field: Optional[str], model_to_evaluate: Optional[str], model_to_evaluate_source: Optional[str], model_to_evaluate_external_api_token: Optional[str], + model_to_evaluate_external_base_url: Optional[str], model_to_evaluate_max_tokens: Optional[int], model_to_evaluate_temperature: Optional[float], model_to_evaluate_system_template: Optional[str], @@ -226,6 +249,7 @@ def create( model_a: Optional[str], model_a_source: Optional[str], model_a_external_api_token: Optional[str], + model_a_external_base_url: Optional[str], model_a_max_tokens: Optional[int], model_a_temperature: Optional[float], model_a_system_template: Optional[str], @@ -234,6 +258,7 @@ def create( model_b: Optional[str], model_b_source: Optional[str], model_b_external_api_token: Optional[str], + model_b_external_base_url: Optional[str], model_b_max_tokens: Optional[int], model_b_temperature: Optional[float], model_b_system_template: Optional[str], @@ -285,6 +310,10 @@ def create( model_to_evaluate_final["external_api_token"] = ( model_to_evaluate_external_api_token ) + if model_to_evaluate_external_base_url: + model_to_evaluate_final["external_base_url"] = ( + model_to_evaluate_external_base_url + ) # Build model-a configuration model_a_final: Union[Dict[str, Any], None, str] = None @@ -318,6 +347,8 @@ def create( } if model_a_external_api_token: model_a_final["external_api_token"] = model_a_external_api_token + if model_a_external_base_url: + model_a_final["external_base_url"] = model_a_external_base_url # Build model-b configuration model_b_final: Union[Dict[str, Any], None, str] = None @@ -351,6 +382,8 @@ def create( } if model_b_external_api_token: model_b_final["external_api_token"] = model_b_external_api_token + if model_b_external_base_url: + model_b_final["external_base_url"] = model_b_external_base_url try: response = client.evaluation.create( @@ -359,6 +392,7 @@ def create( judge_model_source=judge_model_source, judge_system_template=judge_system_template, judge_external_api_token=judge_external_api_token, + judge_external_base_url=judge_external_base_url, input_data_file_path=input_data_file_path, model_to_evaluate=model_to_evaluate_final, labels=labels_list, diff --git a/src/together/resources/evaluation.py b/src/together/resources/evaluation.py index 1b631b9..2f2a1f1 100644 --- a/src/together/resources/evaluation.py +++ b/src/together/resources/evaluation.py @@ -32,6 +32,7 @@ def create( judge_system_template: str, input_data_file_path: str, judge_external_api_token: Optional[str] = None, + judge_external_base_url: Optional[str] = None, # Classify-specific parameters labels: Optional[List[str]] = None, pass_labels: Optional[List[str]] = None, @@ -55,6 +56,7 @@ def create( judge_system_template: System template for the judge input_data_file_path: Path to input data file judge_external_api_token: Optional external API token for the judge model + judge_external_base_url: Optional external base URLs for the judge model labels: List of classification labels (required for classify) pass_labels: List of labels considered as passing (required for classify) min_score: Minimum score value (required for score) @@ -82,6 +84,7 @@ def create( model_source=judge_model_source, system_template=judge_system_template, external_api_token=judge_external_api_token, + external_base_url=judge_external_base_url, ) parameters: Union[ClassifyParameters, ScoreParameters, CompareParameters] # Build parameters based on type @@ -423,6 +426,7 @@ async def create( judge_system_template: str, input_data_file_path: str, judge_external_api_token: Optional[str] = None, + judge_external_base_url: Optional[str] = None, # Classify-specific parameters labels: Optional[List[str]] = None, pass_labels: Optional[List[str]] = None, @@ -446,6 +450,7 @@ async def create( judge_system_template: System template for the judge input_data_file_path: Path to input data file judge_external_api_token: Optional external API token for the judge model + judge_external_base_url: Optional external base URLs for the judge model labels: List of classification labels (required for classify) pass_labels: List of labels considered as passing (required for classify) min_score: Minimum score value (required for score) @@ -473,6 +478,7 @@ async def create( model_source=judge_model_source, system_template=judge_system_template, external_api_token=judge_external_api_token, + external_base_url=judge_external_base_url, ) parameters: Union[ClassifyParameters, ScoreParameters, CompareParameters] # Build parameters based on type diff --git a/src/together/types/evaluation.py b/src/together/types/evaluation.py index 3375912..ca7ed9b 100644 --- a/src/together/types/evaluation.py +++ b/src/together/types/evaluation.py @@ -27,6 +27,7 @@ class JudgeModelConfig(BaseModel): model_source: Literal["serverless", "dedicated", "external"] system_template: str external_api_token: Optional[str] = None + external_base_url: Optional[str] = None class ModelRequest(BaseModel): @@ -37,6 +38,7 @@ class ModelRequest(BaseModel): system_template: str input_template: str external_api_token: Optional[str] = None + external_base_url: Optional[str] = None class ClassifyParameters(BaseModel):