diff --git a/pyproject.toml b/pyproject.toml index 90307a48..d5cfcc73 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -102,6 +102,9 @@ dev = [ "ipykernel>=6.30.1", "pip>=25.2", ] +hint = [ + "sentence-transformers>=5.0.0", +] [project.scripts] diff --git a/src/agentlab/agents/tool_use_agent/tool_use_agent.py b/src/agentlab/agents/tool_use_agent/tool_use_agent.py index 4e79c5b0..375c829e 100644 --- a/src/agentlab/agents/tool_use_agent/tool_use_agent.py +++ b/src/agentlab/agents/tool_use_agent/tool_use_agent.py @@ -1,13 +1,20 @@ import fnmatch import json +import logging +import os +import random +import time from abc import ABC, abstractmethod +from collections import defaultdict from copy import copy from dataclasses import asdict, dataclass, field from pathlib import Path -from typing import Any +from typing import Any, Literal import bgym +import numpy as np import pandas as pd +import requests from bgym import Benchmark as BgymBenchmark from browsergym.core.observation import extract_screenshot from browsergym.utils.obs import ( @@ -34,6 +41,8 @@ ) from agentlab.llm.tracking import cost_tracker_decorator +logger = logging.getLogger(__name__) + @dataclass class Block(ABC): @@ -176,7 +185,6 @@ class Obs(Block): def apply( self, llm, discussion: StructuredDiscussion, obs: dict, last_llm_output: LLMOutput ) -> dict: - obs_msg = llm.msg.user() tool_calls = last_llm_output.tool_calls if self.use_last_error: @@ -298,22 +306,52 @@ def apply_init(self, llm, discussion: StructuredDiscussion) -> dict: class TaskHint(Block): use_task_hint: bool = True hint_db_rel_path: str = "hint_db.csv" + hint_retrieval_mode: Literal["direct", "llm", "emb"] = "direct" + top_n: int = 4 # Number of top hints to return when using embedding retrieval + embedder_model: str = "Qwen/Qwen3-Embedding-0.6B" # Model for embedding hints + embedder_server: str = "http://localhost:5000" + llm_prompt: str = """We're choosing hints to help solve the following task:\n{goal}.\n +You need to choose the most relevant hints topic from the following list:\n\nHint topics:\n{topics}\n +Choose hint topic for the task and return only its number, e.g. 1. If you don't know the answer, return -1.""" def _init(self): """Initialize the block.""" - hint_db_path = Path(__file__).parent / self.hint_db_rel_path + if Path(self.hint_db_rel_path).is_absolute(): + hint_db_path = Path(self.hint_db_rel_path) + else: + hint_db_path = Path(__file__).parent / self.hint_db_rel_path self.hint_db = pd.read_csv(hint_db_path, header=0, index_col=None, dtype=str) + if self.hint_retrieval_mode == "emb": + self.encode_hints() + + def oai_embed(self, text: str): + response = self._oai_emb.create(input=text, model="text-embedding-3-small") + return response.data[0].embedding + + def encode_hints(self): + self.uniq_hints = self.hint_db.drop_duplicates(subset=["hint"], keep="first") + logger.info( + f"Encoding {len(self.uniq_hints)} unique hints with semantic keys using {self.embedder_model} model." + ) + hints = self.uniq_hints["hint"].tolist() + semantic_keys = self.uniq_hints["semantic_keys"].tolist() + lines = [f"{k}: {h}" for h, k in zip(hints, semantic_keys)] + emb_path = f"{self.hint_db_rel_path}.embs.npy" + assert os.path.exists(emb_path), f"Embedding file not found: {emb_path}" + logger.info(f"Loading hint embeddings from: {emb_path}") + emb_dict = np.load(emb_path, allow_pickle=True).item() + self.hint_embeddings = np.array([emb_dict[k] for k in lines]) + logger.info(f"Loaded hint embeddings shape: {self.hint_embeddings.shape}") def apply(self, llm, discussion: StructuredDiscussion, task_name: str) -> dict: if not self.use_task_hint: - return + return {} - task_hints = self.hint_db[ - self.hint_db["task_name"].apply(lambda x: fnmatch.fnmatch(x, task_name)) - ] + goal = "\n".join([c.get("text", "") for c in discussion.groups[0].messages[1].content]) + task_hints = self.choose_hints(llm, task_name, goal) hints = [] - for hint in task_hints["hint"]: + for hint in task_hints: hint = hint.strip() if hint: hints.append(f"- {hint}") @@ -327,6 +365,94 @@ def apply(self, llm, discussion: StructuredDiscussion, task_name: str) -> dict: discussion.append(msg) + def choose_hints(self, llm, task_name: str, goal: str) -> list[str]: + """Choose hints based on the task name.""" + if self.hint_retrieval_mode == "llm": + return self.choose_hints_llm(llm, goal) + elif self.hint_retrieval_mode == "direct": + return self.choose_hints_direct(task_name) + elif self.hint_retrieval_mode == "emb": + return self.choose_hints_emb(goal) + else: + raise ValueError(f"Unknown hint retrieval mode: {self.hint_retrieval_mode}") + + def choose_hints_llm(self, llm, goal: str) -> list[str]: + """Choose hints using LLM to filter the hints.""" + topic_to_hints = defaultdict(list) + for i, row in self.hint_db.iterrows(): + topic_to_hints[row["semantic_keys"]].append(i) + hint_topics = list(topic_to_hints.keys()) + topics = "\n".join([f"{i}. {h}" for i, h in enumerate(hint_topics)]) + prompt = self.llm_prompt.format(goal=goal, topics=topics) + response = llm(APIPayload(messages=[llm.msg.user().add_text(prompt)])) + try: + hint_topic_idx = json.loads(response.think) + if hint_topic_idx < 0 or hint_topic_idx >= len(hint_topics): + logger.error(f"Wrong LLM hint id response: {response.think}, no hints") + return [] + hint_topic = hint_topics[hint_topic_idx] + hint_indices = topic_to_hints[hint_topic] + df = self.hint_db.iloc[hint_indices].copy() + df = df.drop_duplicates(subset=["hint"], keep="first") # leave only unique hints + hints = df["hint"].tolist() + logger.debug(f"LLM hint topic {hint_topic_idx}, chosen hints: {df['hint'].tolist()}") + except json.JSONDecodeError: + logger.error(f"Failed to parse LLM hint id response: {response.think}, no hints") + hints = [] + return hints + + def choose_hints_emb(self, goal: str) -> list[str]: + """Choose hints using embeddings to filter the hints.""" + goal_embeddings = self._encode([goal], prompt="task description") + similarities = self._similarity(goal_embeddings.tolist(), self.hint_embeddings.tolist()) + top_indices = similarities.argsort()[0][-self.top_n :].tolist() + logger.info(f"Top hint indices based on embedding similarity: {top_indices}") + hints = self.uniq_hints.iloc[top_indices] + logger.info(f"Embedding-based hints chosen: {hints}") + return hints["hint"].tolist() + + def _encode(self, texts: list[str], prompt: str = "", timeout: int = 10, max_retries: int = 5): + """Call the encode API endpoint with timeout and retries""" + for attempt in range(max_retries): + try: + response = requests.post( + f"{self.embedder_server}/encode", + json={"texts": texts, "prompt": prompt}, + timeout=timeout, + ) + embs = response.json()["embeddings"] + return np.asarray(embs) + except (requests.exceptions.RequestException, requests.exceptions.Timeout) as e: + if attempt == max_retries - 1: + raise e + time.sleep(random.uniform(1, timeout)) + continue + + def _similarity( + self, texts1: list[str], texts2: list[str], timeout: int = 2, max_retries: int = 5 + ): + """Call the similarity API endpoint with timeout and retries""" + for attempt in range(max_retries): + try: + response = requests.post( + f"{self.embedder_server}/similarity", + json={"texts1": texts1, "texts2": texts2}, + timeout=timeout, + ) + similarities = response.json()["similarities"] + return np.asarray(similarities) + except (requests.exceptions.RequestException, requests.exceptions.Timeout) as e: + if attempt == max_retries - 1: + raise e + time.sleep(random.uniform(1, timeout)) + continue + + def choose_hints_direct(self, task_name: str) -> list[str]: + hints = self.hint_db[ + self.hint_db["task_name"].apply(lambda x: fnmatch.fnmatch(x, task_name)) + ] + return hints["hint"].tolist() + @dataclass class PromptConfig: @@ -386,7 +512,8 @@ def __init__( self.model_args = model_args self.config = config self.action_set: bgym.AbstractActionSet = action_set or bgym.HighLevelActionSet( - self.config.action_subsets, multiaction=self.config.multiaction # type: ignore + self.config.action_subsets, + multiaction=self.config.multiaction, # type: ignore ) self.tools = self.action_set.to_tool_description(api=model_args.api) @@ -510,6 +637,15 @@ def get_action(self, obs: Any) -> float: vision_support=True, ) +GPT_4_1_CC_API = OpenAIChatModelArgs( + model_name="gpt-4.1", + max_total_tokens=200_000, + max_input_tokens=200_000, + max_new_tokens=2_000, + temperature=0.1, + vision_support=True, +) + GPT_5_mini = OpenAIChatModelArgs( model_name="gpt-5-mini-2025-08-07", max_total_tokens=400_000, @@ -548,7 +684,7 @@ def get_action(self, obs: Any) -> float: vision_support=True, ) -CLAUDE_MODEL_CONFIG = ClaudeResponseModelArgs( +CLAUDE_SONNET_37 = ClaudeResponseModelArgs( model_name="claude-3-7-sonnet-20250219", max_total_tokens=200_000, max_input_tokens=200_000, @@ -557,6 +693,15 @@ def get_action(self, obs: Any) -> float: vision_support=True, ) +CLAUDE_SONNET_4 = ClaudeResponseModelArgs( + model_name="claude-sonnet-4-20250514", + max_total_tokens=200_000, + max_input_tokens=200_000, + max_new_tokens=2_000, + temperature=0.1, + vision_support=True, +) + O3_RESPONSE_MODEL = OpenAIResponseModelArgs( model_name="o3-2025-04-16", max_total_tokens=200_000, @@ -574,6 +719,25 @@ def get_action(self, obs: Any) -> float: vision_support=True, ) +GPT_5 = OpenAIChatModelArgs( + model_name="gpt-5", + max_total_tokens=200_000, + max_input_tokens=200_000, + max_new_tokens=8_000, + temperature=None, + vision_support=True, +) + + +GPT_5_MINI = OpenAIChatModelArgs( + model_name="gpt-5-mini-2025-08-07", + max_total_tokens=200_000, + max_input_tokens=200_000, + max_new_tokens=2_000, + temperature=1.0, + vision_support=True, +) + GPT4_1_OPENROUTER_MODEL = OpenRouterModelArgs( model_name="openai/gpt-4.1", max_total_tokens=200_000, @@ -600,12 +764,12 @@ def get_action(self, obs: Any) -> float: keep_last_n_obs=None, multiaction=False, # whether to use multi-action or not # action_subsets=("bid",), - action_subsets=("coord"), + action_subsets=("coord",), # action_subsets=("coord", "bid"), ) AGENT_CONFIG = ToolUseAgentArgs( - model_args=CLAUDE_MODEL_CONFIG, + model_args=CLAUDE_SONNET_37, config=DEFAULT_PROMPT_CONFIG, ) @@ -633,7 +797,7 @@ def get_action(self, obs: Any) -> float: ) OSWORLD_CLAUDE = ToolUseAgentArgs( - model_args=CLAUDE_MODEL_CONFIG, + model_args=CLAUDE_SONNET_37, config=PromptConfig( tag_screenshot=True, goal=Goal(goal_as_system_msg=True), diff --git a/src/agentlab/llm/chat_api.py b/src/agentlab/llm/chat_api.py index 93219891..dc9667b5 100644 --- a/src/agentlab/llm/chat_api.py +++ b/src/agentlab/llm/chat_api.py @@ -359,7 +359,7 @@ def __init__( min_retry_wait_time=min_retry_wait_time, api_key_env_var="OPENAI_API_KEY", client_class=OpenAI, - pricing_func=tracking.get_pricing_openai, + pricing_func=partial(tracking.get_pricing_litellm, model_name=model_name), log_probs=log_probs, ) diff --git a/src/agentlab/llm/tracking.py b/src/agentlab/llm/tracking.py index c3dc700e..fb96a2d1 100644 --- a/src/agentlab/llm/tracking.py +++ b/src/agentlab/llm/tracking.py @@ -178,9 +178,9 @@ def __call__(self, *args, **kwargs): # 'self' here calls ._call_api() method of the subclass response = self._call_api(*args, **kwargs) usage = dict(getattr(response, "usage", {})) - if "prompt_tokens_details" in usage: + if "prompt_tokens_details" in usage and usage["prompt_tokens_details"]: usage["cached_tokens"] = usage["prompt_tokens_details"].cached_tokens - if "input_tokens_details" in usage: + if "input_tokens_details" in usage and usage["input_tokens_details"]: usage["cached_tokens"] = usage["input_tokens_details"].cached_tokens usage = {f"usage_{k}": v for k, v in usage.items() if isinstance(v, (int, float))} usage |= {"n_api_calls": 1} @@ -338,12 +338,16 @@ def get_effective_cost_from_openai_api(self, response) -> float: if api_type == "chatcompletion": total_input_tokens = usage.prompt_tokens # (cache read tokens + new input tokens) output_tokens = usage.completion_tokens - cached_input_tokens = usage.prompt_tokens_details.cached_tokens + cached_input_tokens = ( + usage.prompt_tokens_details.cached_tokens if usage.prompt_tokens_details else 0 + ) new_input_tokens = total_input_tokens - cached_input_tokens elif api_type == "response": total_input_tokens = usage.input_tokens # (cache read tokens + new input tokens) output_tokens = usage.output_tokens - cached_input_tokens = usage.input_tokens_details.cached_tokens + cached_input_tokens = ( + usage.input_tokens_details.cached_tokens if usage.input_tokens_details else 0 + ) new_input_tokens = total_input_tokens - cached_input_tokens else: logging.warning(f"Unsupported API type: {api_type}. Defaulting cost to 0.0.") diff --git a/uv.lock b/uv.lock index 691500b5..85bc8bd3 100644 --- a/uv.lock +++ b/uv.lock @@ -72,6 +72,9 @@ dev = [ { name = "ipykernel" }, { name = "pip" }, ] +hint = [ + { name = "sentence-transformers" }, +] [package.metadata] requires-dist = [ @@ -118,6 +121,7 @@ dev = [ { name = "ipykernel", specifier = ">=6.30.1" }, { name = "pip", specifier = ">=25.2" }, ] +hint = [{ name = "sentence-transformers", specifier = ">=5.0.0" }] [[package]] name = "aiofiles" @@ -5179,6 +5183,47 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/10/cc/75e9f17e3670b5ed93c32456fda823333c6279b144cd93e2c03aa06aa472/scikit_image-0.25.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:330d061bd107d12f8d68f1d611ae27b3b813b8cdb0300a71d07b1379178dd4cd", size = 13862801 }, ] +[[package]] +name = "scikit-learn" +version = "1.7.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "joblib" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.3.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "scipy", version = "1.16.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "threadpoolctl" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/41/84/5f4af978fff619706b8961accac84780a6d298d82a8873446f72edb4ead0/scikit_learn-1.7.1.tar.gz", hash = "sha256:24b3f1e976a4665aa74ee0fcaac2b8fccc6ae77c8e07ab25da3ba6d3292b9802", size = 7190445 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/74/88/0dd5be14ef19f2d80a77780be35a33aa94e8a3b3223d80bee8892a7832b4/scikit_learn-1.7.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:406204dd4004f0517f0b23cf4b28c6245cbd51ab1b6b78153bc784def214946d", size = 9338868 }, + { url = "https://files.pythonhosted.org/packages/fd/52/3056b6adb1ac58a0bc335fc2ed2fcf599974d908855e8cb0ca55f797593c/scikit_learn-1.7.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:16af2e44164f05d04337fd1fc3ae7c4ea61fd9b0d527e22665346336920fe0e1", size = 8655943 }, + { url = "https://files.pythonhosted.org/packages/fb/a4/e488acdece6d413f370a9589a7193dac79cd486b2e418d3276d6ea0b9305/scikit_learn-1.7.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2f2e78e56a40c7587dea9a28dc4a49500fa2ead366869418c66f0fd75b80885c", size = 9652056 }, + { url = "https://files.pythonhosted.org/packages/18/41/bceacec1285b94eb9e4659b24db46c23346d7e22cf258d63419eb5dec6f7/scikit_learn-1.7.1-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b62b76ad408a821475b43b7bb90a9b1c9a4d8d125d505c2df0539f06d6e631b1", size = 9473691 }, + { url = "https://files.pythonhosted.org/packages/12/7b/e1ae4b7e1dd85c4ca2694ff9cc4a9690970fd6150d81b975e6c5c6f8ee7c/scikit_learn-1.7.1-cp310-cp310-win_amd64.whl", hash = "sha256:9963b065677a4ce295e8ccdee80a1dd62b37249e667095039adcd5bce6e90deb", size = 8900873 }, + { url = "https://files.pythonhosted.org/packages/b4/bd/a23177930abd81b96daffa30ef9c54ddbf544d3226b8788ce4c3ef1067b4/scikit_learn-1.7.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:90c8494ea23e24c0fb371afc474618c1019dc152ce4a10e4607e62196113851b", size = 9334838 }, + { url = "https://files.pythonhosted.org/packages/8d/a1/d3a7628630a711e2ac0d1a482910da174b629f44e7dd8cfcd6924a4ef81a/scikit_learn-1.7.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:bb870c0daf3bf3be145ec51df8ac84720d9972170786601039f024bf6d61a518", size = 8651241 }, + { url = "https://files.pythonhosted.org/packages/26/92/85ec172418f39474c1cd0221d611345d4f433fc4ee2fc68e01f524ccc4e4/scikit_learn-1.7.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:40daccd1b5623f39e8943ab39735cadf0bdce80e67cdca2adcb5426e987320a8", size = 9718677 }, + { url = "https://files.pythonhosted.org/packages/df/ce/abdb1dcbb1d2b66168ec43b23ee0cee356b4cc4100ddee3943934ebf1480/scikit_learn-1.7.1-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:30d1f413cfc0aa5a99132a554f1d80517563c34a9d3e7c118fde2d273c6fe0f7", size = 9511189 }, + { url = "https://files.pythonhosted.org/packages/b2/3b/47b5eaee01ef2b5a80ba3f7f6ecf79587cb458690857d4777bfd77371c6f/scikit_learn-1.7.1-cp311-cp311-win_amd64.whl", hash = "sha256:c711d652829a1805a95d7fe96654604a8f16eab5a9e9ad87b3e60173415cb650", size = 8914794 }, + { url = "https://files.pythonhosted.org/packages/cb/16/57f176585b35ed865f51b04117947fe20f130f78940c6477b6d66279c9c2/scikit_learn-1.7.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:3cee419b49b5bbae8796ecd690f97aa412ef1674410c23fc3257c6b8b85b8087", size = 9260431 }, + { url = "https://files.pythonhosted.org/packages/67/4e/899317092f5efcab0e9bc929e3391341cec8fb0e816c4789686770024580/scikit_learn-1.7.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:2fd8b8d35817b0d9ebf0b576f7d5ffbbabdb55536b0655a8aaae629d7ffd2e1f", size = 8637191 }, + { url = "https://files.pythonhosted.org/packages/f3/1b/998312db6d361ded1dd56b457ada371a8d8d77ca2195a7d18fd8a1736f21/scikit_learn-1.7.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:588410fa19a96a69763202f1d6b7b91d5d7a5d73be36e189bc6396bfb355bd87", size = 9486346 }, + { url = "https://files.pythonhosted.org/packages/ad/09/a2aa0b4e644e5c4ede7006748f24e72863ba2ae71897fecfd832afea01b4/scikit_learn-1.7.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e3142f0abe1ad1d1c31a2ae987621e41f6b578144a911ff4ac94781a583adad7", size = 9290988 }, + { url = "https://files.pythonhosted.org/packages/15/fa/c61a787e35f05f17fc10523f567677ec4eeee5f95aa4798dbbbcd9625617/scikit_learn-1.7.1-cp312-cp312-win_amd64.whl", hash = "sha256:3ddd9092c1bd469acab337d87930067c87eac6bd544f8d5027430983f1e1ae88", size = 8735568 }, + { url = "https://files.pythonhosted.org/packages/52/f8/e0533303f318a0f37b88300d21f79b6ac067188d4824f1047a37214ab718/scikit_learn-1.7.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b7839687fa46d02e01035ad775982f2470be2668e13ddd151f0f55a5bf123bae", size = 9213143 }, + { url = "https://files.pythonhosted.org/packages/71/f3/f1df377d1bdfc3e3e2adc9c119c238b182293e6740df4cbeac6de2cc3e23/scikit_learn-1.7.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:a10f276639195a96c86aa572ee0698ad64ee939a7b042060b98bd1930c261d10", size = 8591977 }, + { url = "https://files.pythonhosted.org/packages/99/72/c86a4cd867816350fe8dee13f30222340b9cd6b96173955819a5561810c5/scikit_learn-1.7.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:13679981fdaebc10cc4c13c43344416a86fcbc61449cb3e6517e1df9d12c8309", size = 9436142 }, + { url = "https://files.pythonhosted.org/packages/e8/66/277967b29bd297538dc7a6ecfb1a7dce751beabd0d7f7a2233be7a4f7832/scikit_learn-1.7.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4f1262883c6a63f067a980a8cdd2d2e7f2513dddcef6a9eaada6416a7a7cbe43", size = 9282996 }, + { url = "https://files.pythonhosted.org/packages/e2/47/9291cfa1db1dae9880420d1e07dbc7e8dd4a7cdbc42eaba22512e6bde958/scikit_learn-1.7.1-cp313-cp313-win_amd64.whl", hash = "sha256:ca6d31fb10e04d50bfd2b50d66744729dbb512d4efd0223b864e2fdbfc4cee11", size = 8707418 }, + { url = "https://files.pythonhosted.org/packages/61/95/45726819beccdaa34d3362ea9b2ff9f2b5d3b8bf721bd632675870308ceb/scikit_learn-1.7.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:781674d096303cfe3d351ae6963ff7c958db61cde3421cd490e3a5a58f2a94ae", size = 9561466 }, + { url = "https://files.pythonhosted.org/packages/ee/1c/6f4b3344805de783d20a51eb24d4c9ad4b11a7f75c1801e6ec6d777361fd/scikit_learn-1.7.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:10679f7f125fe7ecd5fad37dd1aa2daae7e3ad8df7f3eefa08901b8254b3e12c", size = 9040467 }, + { url = "https://files.pythonhosted.org/packages/6f/80/abe18fe471af9f1d181904203d62697998b27d9b62124cd281d740ded2f9/scikit_learn-1.7.1-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1f812729e38c8cb37f760dce71a9b83ccfb04f59b3dca7c6079dcdc60544fa9e", size = 9532052 }, + { url = "https://files.pythonhosted.org/packages/14/82/b21aa1e0c4cee7e74864d3a5a721ab8fcae5ca55033cb6263dca297ed35b/scikit_learn-1.7.1-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:88e1a20131cf741b84b89567e1717f27a2ced228e0f29103426102bc2e3b8ef7", size = 9361575 }, + { url = "https://files.pythonhosted.org/packages/f2/20/f4777fcd5627dc6695fa6b92179d0edb7a3ac1b91bcd9a1c7f64fa7ade23/scikit_learn-1.7.1-cp313-cp313t-win_amd64.whl", hash = "sha256:b1bd1d919210b6a10b7554b717c9000b5485aa95a1d0f177ae0d7ee8ec750da5", size = 9277310 }, +] + [[package]] name = "scipy" version = "1.15.3" @@ -5329,6 +5374,26 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6a/23/8146aad7d88f4fcb3a6218f41a60f6c2d4e3a72de72da1825dc7c8f7877c/semantic_version-2.10.0-py2.py3-none-any.whl", hash = "sha256:de78a3b8e0feda74cabc54aab2da702113e33ac9d9eb9d2389bcf1f58b7d9177", size = 15552 }, ] +[[package]] +name = "sentence-transformers" +version = "5.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "huggingface-hub" }, + { name = "pillow" }, + { name = "scikit-learn" }, + { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "scipy", version = "1.16.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "torch" }, + { name = "tqdm" }, + { name = "transformers" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/46/b8/1b99379b730bc403d8e9ddc2db56f8ac9ce743734b44a1dbeebb900490d4/sentence_transformers-5.1.0.tar.gz", hash = "sha256:70c7630697cc1c64ffca328d6e8688430ebd134b3c2df03dc07cb3a016b04739", size = 370745 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6d/70/2b5b76e98191ec3b8b0d1dde52d00ddcc3806799149a9ce987b0d2d31015/sentence_transformers-5.1.0-py3-none-any.whl", hash = "sha256:fc803929f6a3ce82e2b2c06e0efed7a36de535c633d5ce55efac0b710ea5643e", size = 483377 }, +] + [[package]] name = "shellingham" version = "1.5.4" @@ -5542,6 +5607,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a6/a5/c0b6468d3824fe3fde30dbb5e1f687b291608f9473681bbf7dabbf5a87d7/text_unidecode-1.3-py2.py3-none-any.whl", hash = "sha256:1311f10e8b895935241623731c2ba64f4c455287888b18189350b67134a822e8", size = 78154 }, ] +[[package]] +name = "threadpoolctl" +version = "3.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b7/4d/08c89e34946fce2aec4fbb45c9016efd5f4d7f24af8e5d93296e935631d8/threadpoolctl-3.6.0.tar.gz", hash = "sha256:8ab8b4aa3491d812b623328249fab5302a68d2d71745c8a4c719a2fcaba9f44e", size = 21274 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/32/d5/f9a850d79b0851d1d4ef6456097579a9005b31fea68726a4ae5f2d82ddd9/threadpoolctl-3.6.0-py3-none-any.whl", hash = "sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb", size = 18638 }, +] + [[package]] name = "tifffile" version = "2025.5.10"