diff --git a/src/fi/opt/base/base_optimizer.py b/src/fi/opt/base/base_optimizer.py index b4a4e9a..3aeea50 100644 --- a/src/fi/opt/base/base_optimizer.py +++ b/src/fi/opt/base/base_optimizer.py @@ -30,9 +30,16 @@ def optimize( data_mapper: The user-provided data mapper. dataset: The dataset to use for evaluation. metric: The metric function to use for evaluation. - **kwargs: Additional, optimizer-specific arguments. + **kwargs: Additional, optimizer-specific arguments. Common optional + arguments include: + - early_stopping (EarlyStoppingConfig): Configuration for early + stopping criteria. Supports patience-based stopping, score + thresholds, minimum improvement deltas, and cost budgets. + When configured, optimization may terminate before reaching + the maximum number of iterations. Returns: - An OptimizationResult object with the best generator and results. + An OptimizationResult object with the best generator, iteration + history, final score, and early stopping metadata (if applicable). """ pass diff --git a/src/fi/opt/optimizers/bayesian_search.py b/src/fi/opt/optimizers/bayesian_search.py index 4f8b295..2ed00d2 100644 --- a/src/fi/opt/optimizers/bayesian_search.py +++ b/src/fi/opt/optimizers/bayesian_search.py @@ -10,6 +10,7 @@ from ..datamappers import BasicDataMapper from ..generators.litellm import LiteLLMGenerator from ..base.evaluator import Evaluator +from ..utils.early_stopping import EarlyStoppingConfig, EarlyStoppingChecker TEACHER_SYSTEM_PROMPT = ( @@ -154,10 +155,17 @@ def optimize( data_mapper: BasicDataMapper, dataset: List[Dict[str, Any]], initial_prompts: List[str], + early_stopping: Optional[EarlyStoppingConfig] = None, **kwargs: Any, ) -> OptimizationResult: logging.info("--- Starting Bayesian Search Optimization ---") + # Initialize early stopping checker + checker = None + if early_stopping and early_stopping.is_enabled(): + checker = EarlyStoppingChecker(early_stopping) + logging.info(f"Early stopping enabled: {early_stopping}") + if not initial_prompts: raise ValueError("Initial prompts list cannot be empty.") @@ -228,6 +236,16 @@ def objective(trial: optuna.Trial) -> float: logging.info( f"Trial {trial.number}: Score={avg_score:.4f}, Num Examples={len(selected_indices)}" ) + + # Check early stopping + if checker: + eval_size = len(self._select_eval_subset(dataset)) + if checker.should_stop(avg_score, eval_size): + logging.info( + f"Early stopping triggered: {checker.get_state()['stop_reason']}" + ) + trial.study.stop() + return avg_score study = optuna.create_study( @@ -238,17 +256,30 @@ def objective(trial: optuna.Trial) -> float: study_name=self.study_name, load_if_exists=bool(self.storage and self.study_name), ) - study.optimize(objective, n_trials=self.n_trials) + + try: + study.optimize(objective, n_trials=self.n_trials) + except Exception as e: + logging.info(f"Optimization stopped: {e}") best_prompt = study.best_trial.user_attrs.get("prompt", initial_prompt) best_generator = LiteLLMGenerator(self.inference_model_name, best_prompt) + # Build result with early stopping metadata return OptimizationResult( best_generator=best_generator, history=history, final_score=float(study.best_value) if study.best_value is not None else 0.0, + early_stopped=checker.get_state()["stopped"] if checker else False, + stop_reason=checker.get_state()["stop_reason"] if checker else None, + total_iterations=len(history), + total_evaluations=( + checker.get_state()["total_evaluations"] + if checker + else sum(len(h.individual_results) for h in history) + ), ) def _score_prompt( diff --git a/src/fi/opt/optimizers/gepa.py b/src/fi/opt/optimizers/gepa.py index 5526858..54a5874 100644 --- a/src/fi/opt/optimizers/gepa.py +++ b/src/fi/opt/optimizers/gepa.py @@ -16,6 +16,7 @@ from ..base.evaluator import Evaluator from ..generators.litellm import LiteLLMGenerator from ..types import OptimizationResult, IterationHistory +from ..utils.early_stopping import EarlyStoppingConfig, EarlyStoppingChecker logger = logging.getLogger(__name__) @@ -32,11 +33,13 @@ def __init__( evaluator: Evaluator, data_mapper: BasicDataMapper, history_list: List[IterationHistory], + early_stopping_checker: Optional[EarlyStoppingChecker] = None, ): self.generator_model = generator_model self.evaluator = evaluator self.data_mapper = data_mapper self.history_list = history_list + self.early_stopping_checker = early_stopping_checker logger.info(f"Initialized with generator_model: {generator_model}") def evaluate( @@ -100,6 +103,17 @@ def evaluate( ) ) + # Check early stopping + if self.early_stopping_checker: + if self.early_stopping_checker.should_stop(avg_score, len(batch)): + logger.info( + f"Early stopping triggered: " + f"{self.early_stopping_checker.get_state()['stop_reason']}" + ) + raise StopIteration( + self.early_stopping_checker.get_state()["stop_reason"] + ) + trajectories = [] if capture_traces: logger.info(f"Capturing traces.") @@ -189,6 +203,7 @@ def optimize( dataset: List[Dict[str, Any]], initial_prompts: List[str], max_metric_calls: Optional[int] = 150, + early_stopping: Optional[EarlyStoppingConfig] = None, ) -> OptimizationResult: opt_start_time = time.time() logger.info("--- Starting GEPA Prompt Optimization ---") @@ -196,6 +211,12 @@ def optimize( logger.info(f"Initial prompts: {initial_prompts}") logger.info(f"Max metric calls: {max_metric_calls}") + # Initialize early stopping checker + checker = None + if early_stopping and early_stopping.is_enabled(): + checker = EarlyStoppingChecker(early_stopping) + logger.info(f"Early stopping enabled: {early_stopping}") + if not initial_prompts: raise ValueError("Initial prompts list cannot be empty for GEPAOptimizer.") history: List[IterationHistory] = [] @@ -206,6 +227,7 @@ def optimize( evaluator=evaluator, data_mapper=data_mapper, history_list=history, + early_stopping_checker=checker, ) # 2. Prepare the inputs for gepa.optimize @@ -215,38 +237,81 @@ def optimize( # 3. Call the external GEPA library's optimize function logger.info("Calling gepa.optimize...") gepa_start_time = time.time() - gepa_result = gepa.optimize( - seed_candidate=seed_candidate, - trainset=dataset, - valset=dataset, - adapter=adapter, - reflection_lm=self.reflection_model, - max_metric_calls=max_metric_calls, - display_progress_bar=True, - ) - gepa_end_time = time.time() - logger.info( - f"gepa.optimize finished in {gepa_end_time - gepa_start_time:.2f}s." - ) - logger.info( - f"GEPA result best score: {gepa_result.val_aggregate_scores[gepa_result.best_idx]}" - ) - logger.info(f"GEPA best candidate: {gepa_result.best_candidate}") - logger.info(f"Captured {len(history)} iterations in history.") - # 4. Translate GEPA's result back into our framework's standard format - logger.info("Translating GEPA result to OptimizationResult...") + try: + gepa_result = gepa.optimize( + seed_candidate=seed_candidate, + trainset=dataset, + valset=dataset, + adapter=adapter, + reflection_lm=self.reflection_model, + max_metric_calls=max_metric_calls, + display_progress_bar=True, + ) + gepa_end_time = time.time() + logger.info( + f"gepa.optimize finished in {gepa_end_time - gepa_start_time:.2f}s." + ) + logger.info( + f"GEPA result best score: {gepa_result.val_aggregate_scores[gepa_result.best_idx]}" + ) + logger.info(f"GEPA best candidate: {gepa_result.best_candidate}") - final_best_generator = LiteLLMGenerator( - model=self.generator_model, - prompt_template=gepa_result.best_candidate.get("prompt", ""), - ) + logger.info(f"Captured {len(history)} iterations in history.") + # 4. Translate GEPA's result back into our framework's standard format + logger.info("Translating GEPA result to OptimizationResult...") - result = OptimizationResult( - best_generator=final_best_generator, - history=history, - final_score=gepa_result.val_aggregate_scores[gepa_result.best_idx], - ) + final_best_generator = LiteLLMGenerator( + model=self.generator_model, + prompt_template=gepa_result.best_candidate.get("prompt", ""), + ) + + # Build result with early stopping metadata + result = OptimizationResult( + best_generator=final_best_generator, + history=history, + final_score=gepa_result.val_aggregate_scores[gepa_result.best_idx], + early_stopped=False, + stop_reason=None, + total_iterations=len(history), + total_evaluations=( + checker.get_state()["total_evaluations"] + if checker + else sum(len(h.individual_results) for h in history) + ), + ) + + except StopIteration as e: + gepa_end_time = time.time() + logger.info( + f"GEPA stopped early after {gepa_end_time - gepa_start_time:.2f}s: {e}" + ) + + # Use best from history + if not history: + raise RuntimeError( + "Early stopping triggered before any evaluations completed" + ) + + best_history = max(history, key=lambda h: h.average_score) + final_best_generator = LiteLLMGenerator( + model=self.generator_model, + prompt_template=best_history.prompt, + ) + + result = OptimizationResult( + best_generator=final_best_generator, + history=history, + final_score=best_history.average_score, + early_stopped=True, + stop_reason=str(e), + total_iterations=len(history), + total_evaluations=( + checker.get_state()["total_evaluations"] + if checker + else sum(len(h.individual_results) for h in history) + ), + ) opt_end_time = time.time() logger.info( diff --git a/src/fi/opt/optimizers/metaprompt.py b/src/fi/opt/optimizers/metaprompt.py index d111314..3b63bed 100644 --- a/src/fi/opt/optimizers/metaprompt.py +++ b/src/fi/opt/optimizers/metaprompt.py @@ -10,6 +10,7 @@ from ..base.evaluator import Evaluator from ..generators.litellm import LiteLLMGenerator from ..types import IterationHistory, OptimizationResult +from ..utils.early_stopping import EarlyStoppingConfig, EarlyStoppingChecker import logging logger = logging.getLogger(__name__) @@ -89,9 +90,16 @@ def optimize( task_description: str = "I want to improve my prompt.", num_rounds: Optional[int] = 5, eval_subset_size: Optional[int] = 40, + early_stopping: Optional[EarlyStoppingConfig] = None, ) -> OptimizationResult: logger.info("--- Starting Meta-Prompt Optimization ---") + # Initialize early stopping checker + checker = None + if early_stopping and early_stopping.is_enabled(): + checker = EarlyStoppingChecker(early_stopping) + logger.info(f"Early stopping enabled: {early_stopping}") + if not initial_prompts: raise ValueError("Initial prompts list cannot be empty.") @@ -125,6 +133,15 @@ def optimize( best_prompt = current_prompt logger.info(f"New best score found: {best_score:.4f}") + # Check early stopping + if checker: + num_evals = len(eval_subset) + if checker.should_stop(current_score, num_evals): + logger.info( + f"Early stopping triggered: {checker.get_state()['stop_reason']}" + ) + break + # 2. Use the teacher model to generate a new, improved prompt annotated_results_str = self._format_results(iteration_history, eval_subset) @@ -157,10 +174,20 @@ def optimize( ) final_best_generator = LiteLLMGenerator(self.teacher.model_name, best_prompt) + + # Build result with early stopping metadata return OptimizationResult( best_generator=final_best_generator, history=history, final_score=best_score, + early_stopped=checker.get_state()["stopped"] if checker else False, + stop_reason=checker.get_state()["stop_reason"] if checker else None, + total_iterations=len(history), + total_evaluations=( + checker.get_state()["total_evaluations"] + if checker + else sum(len(h.individual_results) for h in history) + ), ) def _score_prompt( diff --git a/src/fi/opt/optimizers/promptwizard.py b/src/fi/opt/optimizers/promptwizard.py index 1e81986..85a7cc1 100644 --- a/src/fi/opt/optimizers/promptwizard.py +++ b/src/fi/opt/optimizers/promptwizard.py @@ -2,7 +2,7 @@ import logging import random import re -from typing import Any, Dict, List, Set +from typing import Any, Dict, List, Set, Optional logger = logging.getLogger(__name__) @@ -14,6 +14,7 @@ from ..base.evaluator import Evaluator from ..generators.litellm import LiteLLMGenerator from ..types import IterationHistory, OptimizationResult +from ..utils.early_stopping import EarlyStoppingConfig, EarlyStoppingChecker MUTATE_PROMPT = """ You are an expert in prompt engineering. You will be given a task description and different styles known as meta prompts. Your task is to generate {num_variations} diverse variations of the following instruction by adaptively mixing meta prompt while keeping similar semantic meaning. @@ -91,6 +92,7 @@ def optimize( dataset: List[Dict[str, Any]], initial_prompts: List[str], task_description: str = "No task description given.", + early_stopping: Optional[EarlyStoppingConfig] = None, **kwargs: Any, ) -> OptimizationResult: eval_subset_size = kwargs.get("eval_subset_size", 25) @@ -100,6 +102,12 @@ def optimize( logger.debug(f"Dataset size: {len(dataset)}") logger.debug(f"Evaluation subset size: {eval_subset_size}") + # Initialize early stopping checker + checker = None + if early_stopping and early_stopping.is_enabled(): + checker = EarlyStoppingChecker(early_stopping) + logger.info(f"Early stopping enabled: {early_stopping}") + if not initial_prompts: raise ValueError("Initial prompts list cannot be empty.") @@ -141,6 +149,16 @@ def optimize( score = sorted_by_score[idx].average_score logger.debug(f" - Prompt (Score: {score:.4f}): '{p[:100]}...'") + # Check early stopping + if checker: + best_round_score = sorted_by_score[0].average_score + num_evals = len(candidate_pool) * len(eval_subset) + if checker.should_stop(best_round_score, num_evals): + logger.info( + f"Early stopping triggered: {checker.get_state()['stop_reason']}" + ) + break + # 3. Critique and Refine logger.info("Step 3: Critiquing and refining top prompts...") refined_prompts = set() @@ -195,10 +213,20 @@ def optimize( logger.info(f"Final best prompt (Score: {best_score:.4f}): '{best_prompt}'") final_best_generator = LiteLLMGenerator(self.teacher.model_name, best_prompt) + + # Build result with early stopping metadata return OptimizationResult( best_generator=final_best_generator, history=history, final_score=best_score, + early_stopped=checker.get_state()["stopped"] if checker else False, + stop_reason=checker.get_state()["stop_reason"] if checker else None, + total_iterations=len(history), + total_evaluations=( + checker.get_state()["total_evaluations"] + if checker + else sum(len(h.individual_results) for h in history) + ), ) def _mutate_instruction( diff --git a/src/fi/opt/optimizers/protegi.py b/src/fi/opt/optimizers/protegi.py index 3a9e8ae..a247728 100644 --- a/src/fi/opt/optimizers/protegi.py +++ b/src/fi/opt/optimizers/protegi.py @@ -13,6 +13,7 @@ from ..base.evaluator import Evaluator from ..generators.litellm import LiteLLMGenerator from ..types import IterationHistory, OptimizationResult +from ..utils.early_stopping import EarlyStoppingConfig, EarlyStoppingChecker GET_GRADIENTS_PROMPT = """ You are an expert in prompt engineering. I'm trying to write a zero-shot classifier prompt. @@ -82,11 +83,18 @@ def optimize( data_mapper: BasicDataMapper, dataset: List[Dict[str, Any]], initial_prompts: List[str], + early_stopping: Optional[EarlyStoppingConfig] = None, **kwargs: Any, ) -> OptimizationResult: num_rounds = kwargs.get("num_rounds", 3) eval_subset_size = kwargs.get("eval_subset_size", 32) + # Initialize early stopping checker + checker = None + if early_stopping and early_stopping.is_enabled(): + checker = EarlyStoppingChecker(early_stopping) + logging.info(f"Early stopping enabled: {early_stopping}") + beam = set(initial_prompts) best_overall_score = -1.0 best_overall_prompt = initial_prompts[0] if initial_prompts else "" @@ -138,13 +146,32 @@ def optimize( best_overall_score = best_round_score best_overall_prompt = best_round_prompt + # Check early stopping + if checker: + num_evals = len(candidate_pool) * len(eval_subset) + if checker.should_stop(best_round_score, num_evals): + logging.info( + f"Early stopping triggered: {checker.get_state()['stop_reason']}" + ) + break + final_best_generator = LiteLLMGenerator( self.teacher.model_name, best_overall_prompt ) + + # Build result with early stopping metadata return OptimizationResult( best_generator=final_best_generator, history=history, final_score=best_overall_score, + early_stopped=checker.get_state()["stopped"] if checker else False, + stop_reason=checker.get_state()["stop_reason"] if checker else None, + total_iterations=len(history), + total_evaluations=( + checker.get_state()["total_evaluations"] + if checker + else sum(len(h.individual_results) for h in history) + ), ) def _expand_candidates( diff --git a/src/fi/opt/optimizers/random_search.py b/src/fi/opt/optimizers/random_search.py index d609a62..6817e2e 100644 --- a/src/fi/opt/optimizers/random_search.py +++ b/src/fi/opt/optimizers/random_search.py @@ -3,7 +3,7 @@ import time import json from pydantic import BaseModel, Field, ValidationError -from typing import List, Dict, Any +from typing import List, Dict, Any, Optional from ..base.evaluator import Evaluator from ..base.base_optimizer import BaseOptimizer @@ -11,6 +11,7 @@ from ..datamappers import BasicDataMapper from ..types import IterationHistory, OptimizationResult +from ..utils.early_stopping import EarlyStoppingConfig, EarlyStoppingChecker logger = logging.getLogger(__name__) @@ -65,11 +66,18 @@ def optimize( evaluator: Evaluator, data_mapper: BasicDataMapper, dataset: List[Dict[str, Any]], + early_stopping: Optional[EarlyStoppingConfig] = None, **kwargs: Any, ) -> OptimizationResult: logger.info("--- Starting Random Search Optimization ---") optimization_start_time = time.time() + # Initialize early stopping checker + checker = None + if early_stopping and early_stopping.is_enabled(): + checker = EarlyStoppingChecker(early_stopping) + logger.info(f"Early stopping enabled: {early_stopping}") + initial_prompt = self.generator.get_prompt_template() best_prompt = initial_prompt best_score = -1.0 @@ -126,6 +134,15 @@ def optimize( ) ) + # Check early stopping + if checker: + num_evals = len(dataset) + if checker.should_stop(avg_score, num_evals): + logger.info( + f"Early stopping triggered: {checker.get_state()['stop_reason']}" + ) + break + if avg_score > best_score: best_score = avg_score best_prompt = variation @@ -143,8 +160,19 @@ def optimize( f"--- Random Search Optimization finished in {optimization_end_time - optimization_start_time:.2f} seconds ---" ) + # Build result with early stopping metadata return OptimizationResult( - best_generator=self.generator, history=history, final_score=best_score + best_generator=self.generator, + history=history, + final_score=best_score, + early_stopped=checker.get_state()["stopped"] if checker else False, + stop_reason=checker.get_state()["stop_reason"] if checker else None, + total_iterations=len(history), + total_evaluations=( + checker.get_state()["total_evaluations"] + if checker + else len(history) * len(dataset) + ), ) def _generate_variations(self, initial_prompt: str) -> List[str]: diff --git a/src/fi/opt/types.py b/src/fi/opt/types.py index 0a7b1fb..d27f645 100644 --- a/src/fi/opt/types.py +++ b/src/fi/opt/types.py @@ -40,3 +40,21 @@ class OptimizationResult(BaseModel): best_generator: Any history: List[IterationHistory] final_score: float = 0.0 + + # Early stopping metadata + early_stopped: bool = Field( + default=False, + description="Whether optimization was terminated early by a stopping criterion" + ) + stop_reason: Optional[str] = Field( + default=None, + description="Explanation for early stopping (if applicable)" + ) + total_iterations: int = Field( + default=0, + description="Total number of iterations completed" + ) + total_evaluations: int = Field( + default=0, + description="Total number of dataset evaluations performed" + ) diff --git a/src/fi/opt/utils/__init__.py b/src/fi/opt/utils/__init__.py index c6f54b4..c8fba37 100644 --- a/src/fi/opt/utils/__init__.py +++ b/src/fi/opt/utils/__init__.py @@ -1,3 +1,4 @@ from .setup_logging import setup_logging +from .early_stopping import EarlyStoppingConfig, EarlyStoppingChecker -__all__ = ["setup_logging"] +__all__ = ["setup_logging", "EarlyStoppingConfig", "EarlyStoppingChecker"] diff --git a/src/fi/opt/utils/early_stopping.py b/src/fi/opt/utils/early_stopping.py new file mode 100644 index 0000000..e540e06 --- /dev/null +++ b/src/fi/opt/utils/early_stopping.py @@ -0,0 +1,247 @@ +import logging +from typing import Optional, Dict, Any +from pydantic import BaseModel, Field + +logger = logging.getLogger(__name__) + + +class EarlyStoppingConfig(BaseModel): + """ + Configuration for early stopping criteria in optimization. + + All fields are optional - if all are None, early stopping is disabled. + Multiple criteria can be configured simultaneously; optimization stops + when ANY criterion is met. + """ + + patience: Optional[int] = Field( + None, + gt=0, + description=( + "Stop optimization after this many consecutive iterations " + "without score improvement. None disables patience-based stopping." + ), + ) + + min_score_threshold: Optional[float] = Field( + None, + ge=0.0, + le=1.0, + description=( + "Stop optimization when average score reaches or exceeds this " + "threshold (0.0-1.0). None disables threshold-based stopping." + ), + ) + + min_delta: Optional[float] = Field( + None, + ge=0.0, + description=( + "Minimum score improvement to be considered as progress. " + "If current_score > (best_score + min_delta), patience counter resets. " + "None defaults to 0.0 (any improvement counts)." + ), + ) + + max_evaluations: Optional[int] = Field( + None, + gt=0, + description=( + "Maximum number of dataset evaluations allowed. Counts total " + "evaluations across all iterations. None disables budget-based stopping." + ), + ) + + def is_enabled(self) -> bool: + """ + Check if any early stopping criterion is configured. + + Returns: + True if at least one stopping criterion is set, False otherwise + """ + return any( + [ + self.patience is not None, + self.min_score_threshold is not None, + self.min_delta is not None, + self.max_evaluations is not None, + ] + ) + + +class EarlyStoppingChecker: + """ + Stateful checker that tracks optimization progress and evaluates + stopping conditions across iterations. + + This class maintains internal state about the best score achieved, + iterations without improvement, and total evaluations performed. + Call should_stop() after each iteration to check if optimization + should terminate. + + Example: + config = EarlyStoppingConfig(patience=3, min_delta=0.01) + checker = EarlyStoppingChecker(config) + + for iteration in range(max_iterations): + score = evaluate_current_prompt() + + if checker.should_stop(score, num_evaluations=10): + print(f"Stopped: {checker.get_state()['stop_reason']}") + break + """ + + def __init__(self, config: EarlyStoppingConfig): + """ + Initialize early stopping checker. + + Args: + config: Early stopping configuration + """ + self.config = config + + # State tracking + self._best_score: float = -1.0 + self._iterations_without_improvement: int = 0 + self._total_evaluations: int = 0 + self._stopped: bool = False + self._stop_reason: Optional[str] = None + + def should_stop( + self, + current_score: float, + num_evaluations: int = 1, + ) -> bool: + """ + Check if optimization should stop based on current iteration. + + This method updates internal state and evaluates all configured + stopping criteria. Returns True if any criterion is met. + + Args: + current_score: Average score from current iteration (0.0-1.0) + num_evaluations: Number of dataset evaluations in this iteration + + Returns: + True if any stopping criterion is met, False otherwise + """ + if self._stopped: + return True + + if not self.config.is_enabled(): + return False + + # Update evaluation count + self._total_evaluations += num_evaluations + + # Check cost budget first (always check regardless of score) + if self._check_cost_budget(): + return True + + # Check absolute threshold + if self._check_score_threshold(current_score): + return True + + # Update improvement tracking + min_delta = self.config.min_delta if self.config.min_delta is not None else 0.0 + if current_score > (self._best_score + min_delta): + # Improvement detected - reset patience + self._best_score = current_score + self._iterations_without_improvement = 0 + logger.debug( + f"Early stopping: Improvement detected " + f"(score={current_score:.4f}, best={self._best_score:.4f})" + ) + else: + # No improvement - increment patience counter + self._iterations_without_improvement += 1 + logger.debug( + f"Early stopping: No improvement " + f"({self._iterations_without_improvement} iterations)" + ) + + # Check patience + if self._check_patience(): + return True + + return False + + def _check_patience(self) -> bool: + """Check patience criterion.""" + if self.config.patience is None: + return False + + if self._iterations_without_improvement >= self.config.patience: + self._stopped = True + self._stop_reason = ( + f"Patience exceeded: no improvement for " + f"{self._iterations_without_improvement} iterations " + f"(best score: {self._best_score:.4f})" + ) + return True + + return False + + def _check_score_threshold(self, score: float) -> bool: + """Check absolute score threshold criterion.""" + if self.config.min_score_threshold is None: + return False + + if score >= self.config.min_score_threshold: + self._stopped = True + self._stop_reason = ( + f"Score threshold reached: {score:.4f} >= " + f"{self.config.min_score_threshold:.4f}" + ) + return True + + return False + + def _check_cost_budget(self) -> bool: + """Check cost budget criterion.""" + if self.config.max_evaluations is None: + return False + + if self._total_evaluations >= self.config.max_evaluations: + self._stopped = True + self._stop_reason = ( + f"Evaluation budget exhausted: {self._total_evaluations} " + f">= {self.config.max_evaluations} " + f"(best score: {self._best_score:.4f})" + ) + return True + + return False + + def get_state(self) -> Dict[str, Any]: + """ + Get current checker state for debugging and logging. + + Returns: + Dictionary containing: + - best_score: Best score achieved so far + - iterations_without_improvement: Current patience counter + - total_evaluations: Total evaluations performed + - stopped: Whether stopping criterion has been triggered + - stop_reason: Reason for stopping (if stopped) + """ + return { + "best_score": self._best_score, + "iterations_without_improvement": self._iterations_without_improvement, + "total_evaluations": self._total_evaluations, + "stopped": self._stopped, + "stop_reason": self._stop_reason, + } + + def reset(self) -> None: + """ + Reset checker state for reuse across multiple optimization runs. + + This allows the same checker instance to be reused without + creating a new object. + """ + self._best_score = -1.0 + self._iterations_without_improvement = 0 + self._total_evaluations = 0 + self._stopped = False + self._stop_reason = None diff --git a/src/pyproject.toml b/src/pyproject.toml index 274cb6b..975a026 100644 --- a/src/pyproject.toml +++ b/src/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "agent-opt" -version = "0.0.3" +version = "0.1.0" authors = [ { name="Future AGI", email="hello@futureagi.io" }, ] diff --git a/src/uv.lock b/src/uv.lock index 393ea3b..2ddc5ef 100644 --- a/src/uv.lock +++ b/src/uv.lock @@ -18,7 +18,7 @@ wheels = [ [[package]] name = "agent-opt" -version = "0.0.3" +version = "0.1.0" source = { editable = "." } dependencies = [ { name = "ai-evaluation" },