diff --git a/examples/r_robust_regression/README.md b/examples/r_robust_regression/README.md new file mode 100644 index 000000000..47baf9782 --- /dev/null +++ b/examples/r_robust_regression/README.md @@ -0,0 +1,35 @@ +# R Robust Regression Evolution + +This example demonstrates how to use OpenEvolve with the R programming language. The example focuses on evolving robust regression algorithms that can handle outliers in data, showcasing OpenEvolve's ability to work with statistical computing languages beyond Python. + +## Files + +- `initial_program.r`: Starting R implementation with basic least squares regression +- `evaluator.py`: Python evaluator that runs R code and measures performance +- `config.yaml`: Configuration optimized for statistical algorithm evolution +- `requirements.txt`: Dependencies for both R and Python components + +## Prerequisites + +### R Dependencies +Install R (version 3.6 or higher) and the required packages: + +```r +install.packages(c("jsonlite")) +``` + +### Python Dependencies +```bash +pip install -r requirements.txt +``` + +## Usage + +Run the evolution process: + +```bash +cd examples/r_robust_regression +python ../../openevolve-run.py initial_program.r evaluator.py --config config.yaml --iterations 100 +``` + +This example shows how OpenEvolve can evolve algorithms in R by starting with a basic least squares implementation and improving it to handle outliers through various robust regression techniques. \ No newline at end of file diff --git a/examples/r_robust_regression/config.yaml b/examples/r_robust_regression/config.yaml new file mode 100644 index 000000000..4cba3040e --- /dev/null +++ b/examples/r_robust_regression/config.yaml @@ -0,0 +1,57 @@ +# Configuration for R Robust Regression Evolution + +# General settings +max_iterations: 100 +checkpoint_interval: 10 +log_level: "INFO" + +# LLM configuration +llm: + primary_model: "gemini-2.5-flash-lite-preview-06-17" + primary_model_weight: 0.8 + secondary_model: "gemini-2.5-flash" + secondary_model_weight: 0.2 + api_base: "https://generativelanguage.googleapis.com/v1beta/openai/" + + temperature: 0.8 # Higher temperature for more creative statistical approaches + max_tokens: 4096 + + # Custom system message for R statistical programming + system_message: | + You are an expert statistician and R programmer specializing in robust regression methods. + Focus on improving the algorithm's ability to handle outliers while maintaining accuracy. + Consider techniques like Huber regression, RANSAC, M-estimators, and other robust methods. + Ensure the R code is efficient and follows best practices. + +# Prompt configuration +prompt: + num_top_programs: 3 + num_diverse_programs: 2 + + # Include artifacts to show R errors and performance metrics + include_artifacts: true + max_artifact_bytes: 4096 + +# Database configuration +database: + population_size: 100 + num_islands: 3 + + # Feature dimensions for robust regression + feature_dimensions: + - "score" # Overall performance + - "outlier_robustness" # Robustness to outliers + - "execution_time" # Computational efficiency + feature_bins: 5 + +# Evaluator configuration +evaluator: + timeout: 30 # R scripts can be slow + parallel_evaluations: 2 # Limit parallel R processes + + # Use cascade evaluation to quickly filter out broken implementations + cascade_evaluation: true + cascade_thresholds: + - 0.3 # Basic correctness + - 0.6 # Good performance + - 0.8 # Excellent robustness \ No newline at end of file diff --git a/examples/r_robust_regression/evaluator.py b/examples/r_robust_regression/evaluator.py new file mode 100644 index 000000000..66b9577b1 --- /dev/null +++ b/examples/r_robust_regression/evaluator.py @@ -0,0 +1,248 @@ +""" +Evaluator for R robust regression example +""" + +import asyncio +import json +import os +import subprocess +import tempfile +import time +from pathlib import Path +from typing import Dict, Any + +import numpy as np + +from openevolve.evaluation_result import EvaluationResult + + +async def evaluate(program_path: str) -> EvaluationResult: + """ + Evaluate an R program implementing robust regression. + + Tests the program on synthetic data with outliers to measure: + - Accuracy (MSE, MAE, R-squared) + - Robustness to outliers + - Computational efficiency + """ + try: + # Generate test datasets with different outlier levels + test_cases = [ + generate_regression_data(n_samples=100, n_features=3, outlier_fraction=0.0, noise=0.1), + generate_regression_data(n_samples=100, n_features=3, outlier_fraction=0.1, noise=0.1), + generate_regression_data(n_samples=100, n_features=3, outlier_fraction=0.2, noise=0.1), + generate_regression_data(n_samples=200, n_features=5, outlier_fraction=0.15, noise=0.2), + ] + + total_score = 0 + total_mse = 0 + total_mae = 0 + total_medae = 0 + total_r_squared = 0 + total_outlier_robustness = 0 + total_time = 0 + + artifacts = {"test_results": []} + + for i, (X, y, true_coeffs) in enumerate(test_cases): + # Create a temporary R script that sources the program and runs it + with tempfile.NamedTemporaryFile(mode='w', suffix='.r', delete=False) as f: + f.write(f""" +# Source the program +source("{program_path}") + +# Load test data +X <- as.matrix(read.csv("{X}", header=FALSE)) +y <- as.vector(as.matrix(read.csv("{y}", header=FALSE))) + +# Time the execution +start_time <- Sys.time() +metrics <- main() +end_time <- Sys.time() +exec_time <- as.numeric(end_time - start_time, units="secs") + +# Add execution time +metrics$execution_time <- exec_time + +# Save results +write(jsonlite::toJSON(metrics, auto_unbox=TRUE), "results.json") +""") + test_script = f.name + + # Save test data to temporary CSV files + X_file = tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) + y_file = tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) + np.savetxt(X_file.name, X, delimiter=',', fmt='%.6f') + np.savetxt(y_file.name, y, delimiter=',', fmt='%.6f') + X_file.close() + y_file.close() + + # Run the R script + try: + result = subprocess.run( + ['Rscript', test_script], + capture_output=True, + text=True, + timeout=30, + cwd=os.path.dirname(test_script) + ) + + if result.returncode != 0: + artifacts["test_results"].append({ + "test_case": i, + "error": "R execution failed", + "stderr": result.stderr + }) + continue + + # Read results + results_path = os.path.join(os.path.dirname(test_script), 'results.json') + if not os.path.exists(results_path): + artifacts["test_results"].append({ + "test_case": i, + "error": "No results file produced" + }) + continue + + with open(results_path, 'r') as f: + metrics = json.load(f) + + # Calculate case score (emphasize robustness for cases with outliers) + outlier_fraction = [0.0, 0.1, 0.2, 0.15][i] + if outlier_fraction > 0: + # For cases with outliers, prioritize robust metrics + case_score = ( + 0.2 * (1 - min(metrics.get('mse', 1), 1)) + + 0.3 * (1 - min(metrics.get('medae', 1), 1)) + + 0.4 * metrics.get('outlier_robustness', 0) + + 0.1 * max(0, metrics.get('r_squared', 0)) + ) + else: + # For clean data, prioritize accuracy + case_score = ( + 0.4 * (1 - min(metrics.get('mse', 1), 1)) + + 0.3 * (1 - min(metrics.get('mae', 1), 1)) + + 0.2 * max(0, metrics.get('r_squared', 0)) + + 0.1 * metrics.get('outlier_robustness', 0) + ) + + total_score += case_score + total_mse += metrics.get('mse', 1) + total_mae += metrics.get('mae', 1) + total_medae += metrics.get('medae', 1) + total_r_squared += max(0, metrics.get('r_squared', 0)) + total_outlier_robustness += metrics.get('outlier_robustness', 0) + total_time += metrics.get('execution_time', 1) + + artifacts["test_results"].append({ + "test_case": i, + "outlier_fraction": outlier_fraction, + "metrics": metrics, + "case_score": case_score + }) + + except subprocess.TimeoutExpired: + artifacts["test_results"].append({ + "test_case": i, + "error": "Timeout" + }) + except Exception as e: + artifacts["test_results"].append({ + "test_case": i, + "error": str(e) + }) + finally: + # Cleanup + os.unlink(test_script) + os.unlink(X_file.name) + os.unlink(y_file.name) + if os.path.exists(os.path.join(os.path.dirname(test_script), 'results.json')): + os.unlink(os.path.join(os.path.dirname(test_script), 'results.json')) + + # Calculate average metrics + n_successful = len([r for r in artifacts["test_results"] if "error" not in r]) + if n_successful == 0: + return EvaluationResult( + metrics={ + "score": 0.0, + "mse": float('inf'), + "mae": float('inf'), + "medae": float('inf'), + "r_squared": 0.0, + "outlier_robustness": 0.0, + "execution_time": float('inf') + }, + artifacts=artifacts + ) + + avg_score = total_score / n_successful + avg_mse = total_mse / n_successful + avg_mae = total_mae / n_successful + avg_medae = total_medae / n_successful + avg_r_squared = total_r_squared / n_successful + avg_outlier_robustness = total_outlier_robustness / n_successful + avg_time = total_time / n_successful + + # Add efficiency bonus for faster execution + efficiency_bonus = max(0, 1 - avg_time) * 0.1 + final_score = min(1.0, avg_score + efficiency_bonus) + + return EvaluationResult( + metrics={ + "score": final_score, + "mse": avg_mse, + "mae": avg_mae, + "medae": avg_medae, + "r_squared": avg_r_squared, + "outlier_robustness": avg_outlier_robustness, + "execution_time": avg_time + }, + artifacts=artifacts + ) + + except Exception as e: + return EvaluationResult( + metrics={ + "score": 0.0, + "mse": float('inf'), + "mae": float('inf'), + "medae": float('inf'), + "r_squared": 0.0, + "outlier_robustness": 0.0, + "execution_time": float('inf') + }, + artifacts={"error": str(e), "type": "evaluation_error"} + ) + + +def generate_regression_data(n_samples=100, n_features=3, outlier_fraction=0.1, noise=0.1): + """Generate synthetic regression data with outliers.""" + np.random.seed(42) + + # Generate features + X = np.random.randn(n_samples, n_features) + + # True coefficients + true_coeffs = np.random.randn(n_features + 1) # +1 for intercept + + # Generate target values + y = true_coeffs[0] + X @ true_coeffs[1:] + noise * np.random.randn(n_samples) + + # Add outliers + n_outliers = int(n_samples * outlier_fraction) + if n_outliers > 0: + outlier_indices = np.random.choice(n_samples, n_outliers, replace=False) + # Make outliers by adding large errors + y[outlier_indices] += np.random.choice([-1, 1], n_outliers) * np.random.uniform(3, 10, n_outliers) + + return X, y, true_coeffs + + +# For testing +if __name__ == "__main__": + import sys + if len(sys.argv) > 1: + result = asyncio.run(evaluate(sys.argv[1])) + print(f"Score: {result.metrics['score']:.4f}") + print(f"MSE: {result.metrics['mse']:.4f}") + print(f"Outlier Robustness: {result.metrics['outlier_robustness']:.4f}") \ No newline at end of file diff --git a/examples/r_robust_regression/initial_program.r b/examples/r_robust_regression/initial_program.r new file mode 100644 index 000000000..db97e653a --- /dev/null +++ b/examples/r_robust_regression/initial_program.r @@ -0,0 +1,83 @@ +# Robust Regression Implementation +# This program implements a regression algorithm that can be evolved to better handle outliers + +robust_regression <- function(X, y) { + # EVOLVE-BLOCK-START + # Simple least squares regression as initial implementation + # This can be evolved to use more robust methods like: + # - Huber regression + # - RANSAC + # - Theil-Sen estimator + # - Iteratively reweighted least squares + + # Add intercept column + X_with_intercept <- cbind(1, X) + + # Calculate coefficients using normal equation + # beta = (X'X)^(-1) X'y + XtX <- t(X_with_intercept) %*% X_with_intercept + Xty <- t(X_with_intercept) %*% y + + # Solve for coefficients + coefficients <- solve(XtX, Xty) + + # Calculate predictions + predictions <- X_with_intercept %*% coefficients + + # Calculate residuals + residuals <- y - predictions + + # Return results + return(list( + coefficients = coefficients, + predictions = predictions, + residuals = residuals + )) + # EVOLVE-BLOCK-END +} + +# Function to calculate model performance metrics +calculate_metrics <- function(y_true, y_pred, residuals) { + n <- length(y_true) + + # Mean Squared Error + mse <- mean(residuals^2) + + # Mean Absolute Error + mae <- mean(abs(residuals)) + + # R-squared + ss_res <- sum(residuals^2) + ss_tot <- sum((y_true - mean(y_true))^2) + r_squared <- 1 - (ss_res / ss_tot) + + # Robust metrics + # Median Absolute Error + medae <- median(abs(residuals)) + + # Percentage of outliers (residuals > 2 standard deviations) + outlier_threshold <- 2 * sd(residuals) + outlier_percentage <- sum(abs(residuals) > outlier_threshold) / n + + return(list( + mse = mse, + mae = mae, + r_squared = r_squared, + medae = medae, + outlier_robustness = 1 - outlier_percentage + )) +} + +# Main execution function +main <- function() { + # This will be called by the evaluator with test data + # The evaluator will provide X and y through the environment + + # Perform robust regression + result <- robust_regression(X, y) + + # Calculate metrics + metrics <- calculate_metrics(y, result$predictions, result$residuals) + + return(metrics) +} \ No newline at end of file diff --git a/examples/r_robust_regression/requirements.txt b/examples/r_robust_regression/requirements.txt new file mode 100644 index 000000000..03b550283 --- /dev/null +++ b/examples/r_robust_regression/requirements.txt @@ -0,0 +1,13 @@ +# R packages required for robust regression example +# Install these packages in R with: install.packages(c("jsonlite")) + +# Core dependencies (installed via R): +# - jsonlite: for JSON output from R scripts +# - MASS: for robust regression methods (usually pre-installed) + +# System requirements: +# - R (version 3.6 or higher) +# - Rscript command available in PATH + +# Python requirements for the evaluator +numpy>=1.22.0 \ No newline at end of file diff --git a/examples/rust_adaptive_sort/README.md b/examples/rust_adaptive_sort/README.md new file mode 100644 index 000000000..5b95df050 --- /dev/null +++ b/examples/rust_adaptive_sort/README.md @@ -0,0 +1,37 @@ +# Rust Adaptive Sorting Evolution + +This example demonstrates how to use OpenEvolve with the Rust programming language. The example focuses on evolving adaptive sorting algorithms that optimize their behavior based on input data characteristics, showcasing OpenEvolve's ability to work with compiled systems programming languages. + +## Files + +- `initial_program.rs`: Starting Rust implementation with basic quicksort +- `evaluator.py`: Python evaluator that compiles and benchmarks Rust code +- `config.yaml`: Configuration optimized for performance-critical algorithm evolution +- `requirements.txt`: System dependencies and Python requirements + +## Prerequisites + +### System Dependencies +1. **Rust Toolchain**: Install from [rustup.rs](https://rustup.rs/) + ```bash + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh + source ~/.cargo/env + ``` + +2. **Cargo**: Comes with Rust installation + +### Python Dependencies +```bash +pip install -r requirements.txt +``` + +## Usage + +Run the evolution process: + +```bash +cd examples/rust_adaptive_sort +python ../../openevolve-run.py initial_program.rs evaluator.py --config config.yaml --iterations 150 +``` + +This example shows how OpenEvolve can evolve algorithms in Rust by starting with a basic quicksort implementation and improving it to adaptively handle different data patterns and optimize performance across various sorting scenarios. \ No newline at end of file diff --git a/examples/rust_adaptive_sort/config.yaml b/examples/rust_adaptive_sort/config.yaml new file mode 100644 index 000000000..0f5649d5f --- /dev/null +++ b/examples/rust_adaptive_sort/config.yaml @@ -0,0 +1,57 @@ +# Configuration for Rust Adaptive Sorting Evolution + +# General settings +max_iterations: 150 +checkpoint_interval: 15 +log_level: "INFO" + +# LLM configuration +llm: + primary_model: "gemini-2.5-flash-lite-preview-06-17" + primary_model_weight: 0.8 + secondary_model: "gemini-2.5-flash" + secondary_model_weight: 0.2 + api_base: "https://generativelanguage.googleapis.com/v1beta/openai/" + + temperature: 0.7 + max_tokens: 4096 + + # Custom system message for Rust performance programming + system_message: | + You are an expert Rust systems programmer specializing in high-performance algorithms. + Focus on creating adaptive sorting algorithms that can handle different data patterns efficiently. + Consider hybrid approaches like introsort, adaptive pivot selection, and data-aware optimizations. + Ensure memory safety and idiomatic Rust code. Use appropriate data structures and leverage Rust's zero-cost abstractions. + +# Prompt configuration +prompt: + num_top_programs: 4 + num_diverse_programs: 3 + + # Include compilation errors and performance artifacts + include_artifacts: true + max_artifact_bytes: 8192 + +# Database configuration +database: + population_size: 150 + num_islands: 4 + + # Feature dimensions for sorting algorithms + feature_dimensions: + - "score" # Overall performance score + - "performance_score" # Speed performance + - "adaptability_score" # Adaptability to different data patterns + feature_bins: 8 + +# Evaluator configuration +evaluator: + timeout: 60 # Rust compilation can take time + parallel_evaluations: 3 + + # Use cascade evaluation for performance testing + cascade_evaluation: true + cascade_thresholds: + - 0.5 # Compilation success and basic correctness + - 0.7 # Good performance + - 0.85 # Excellent adaptability \ No newline at end of file diff --git a/examples/rust_adaptive_sort/evaluator.py b/examples/rust_adaptive_sort/evaluator.py new file mode 100644 index 000000000..1a2d7005a --- /dev/null +++ b/examples/rust_adaptive_sort/evaluator.py @@ -0,0 +1,317 @@ +""" +Evaluator for Rust adaptive sorting example +""" + +import asyncio +import json +import os +import subprocess +import tempfile +import time +from pathlib import Path +from typing import Dict, Any, List + +import numpy as np + +from openevolve.evaluation_result import EvaluationResult + + +async def evaluate(program_path: str) -> EvaluationResult: + """ + Evaluate a Rust sorting algorithm implementation. + + Tests the algorithm on various data patterns to measure: + - Correctness + - Performance (speed) + - Adaptability to different data patterns + - Memory efficiency + """ + try: + # Create a temporary Rust project + with tempfile.TemporaryDirectory() as temp_dir: + project_dir = Path(temp_dir) / "sort_test" + + # Initialize Cargo project + result = subprocess.run( + ["cargo", "init", "--name", "sort_test", str(project_dir)], + capture_output=True, + text=True + ) + + if result.returncode != 0: + return EvaluationResult( + metrics={"score": 0.0, "compile_success": 0.0}, + artifacts={"error": "Failed to create Cargo project", "stderr": result.stderr} + ) + + # Copy the program to src/lib.rs + lib_path = project_dir / "src" / "lib.rs" + with open(program_path, 'r') as src: + lib_content = src.read() + with open(lib_path, 'w') as dst: + dst.write(lib_content) + + # Create main.rs with benchmark code + main_content = """ +use sort_test::{adaptive_sort, run_benchmark}; +use std::time::Instant; + +fn main() { + // Generate test datasets with different characteristics + let test_data = vec![ + // Random data + generate_random_data(1000), + generate_random_data(10000), + + // Nearly sorted data + generate_nearly_sorted_data(1000, 0.05), + generate_nearly_sorted_data(10000, 0.05), + + // Reverse sorted data + generate_reverse_sorted_data(1000), + generate_reverse_sorted_data(10000), + + // Data with many duplicates + generate_data_with_duplicates(1000, 10), + generate_data_with_duplicates(10000, 100), + + // Partially sorted data + generate_partially_sorted_data(1000, 0.3), + generate_partially_sorted_data(10000, 0.3), + ]; + + let results = run_benchmark(test_data); + + // Calculate metrics + let all_correct = results.correctness.iter().all(|&c| c); + let correctness_score = if all_correct { 1.0 } else { 0.0 }; + + let avg_time: f64 = results.times.iter().sum::() / results.times.len() as f64; + + // Performance score (normalized, assuming baseline of 0.1 seconds for largest dataset) + let performance_score = 1.0 / (1.0 + avg_time * 10.0); + + // Output results as JSON + println!("{{"); + println!(" \\"correctness\\": {},", correctness_score); + println!(" \\"avg_time\\": {},", avg_time); + println!(" \\"performance_score\\": {},", performance_score); + println!(" \\"adaptability_score\\": {},", results.adaptability_score); + println!(" \\"times\\": {:?},", results.times); + println!(" \\"all_correct\\": {}", all_correct); + println!("}}"); +} + +fn generate_random_data(size: usize) -> Vec { + (0..size).map(|_| rand::random::() % 10000).collect() +} + +fn generate_nearly_sorted_data(size: usize, disorder_rate: f64) -> Vec { + let mut data: Vec = (0..size as i32).collect(); + let swaps = (size as f64 * disorder_rate) as usize; + + for _ in 0..swaps { + let i = rand::random::() % size; + let j = rand::random::() % size; + data.swap(i, j); + } + + data +} + +fn generate_reverse_sorted_data(size: usize) -> Vec { + (0..size as i32).rev().collect() +} + +fn generate_data_with_duplicates(size: usize, unique_values: usize) -> Vec { + (0..size).map(|_| rand::random::() % unique_values as i32).collect() +} + +fn generate_partially_sorted_data(size: usize, sorted_fraction: f64) -> Vec { + let sorted_size = (size as f64 * sorted_fraction) as usize; + let mut data = Vec::with_capacity(size); + + // Add sorted portion + data.extend((0..sorted_size as i32)); + + // Add random portion + data.extend((0..(size - sorted_size)).map(|_| rand::random::() % 10000)); + + data +} + +// Simple random implementation +mod rand { + use std::cell::Cell; + use std::time::{SystemTime, UNIX_EPOCH}; + + thread_local! { + static SEED: Cell = Cell::new( + SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_secs() + ); + } + + pub fn random() -> T + where + T: From, + { + SEED.with(|seed| { + let mut x = seed.get(); + x ^= x << 13; + x ^= x >> 7; + x ^= x << 17; + seed.set(x); + T::from(x) + }) + } +} +""" + main_path = project_dir / "src" / "main.rs" + with open(main_path, 'w') as f: + f.write(main_content) + + # Build the project + build_result = subprocess.run( + ["cargo", "build", "--release"], + cwd=project_dir, + capture_output=True, + text=True, + timeout=60 + ) + + if build_result.returncode != 0: + # Extract compilation errors + return EvaluationResult( + metrics={ + "score": 0.0, + "compile_success": 0.0, + "correctness": 0.0, + "performance_score": 0.0, + "adaptability_score": 0.0 + }, + artifacts={ + "error": "Compilation failed", + "stderr": build_result.stderr, + "stdout": build_result.stdout + } + ) + + # Run the benchmark + run_result = subprocess.run( + ["cargo", "run", "--release"], + cwd=project_dir, + capture_output=True, + text=True, + timeout=30 + ) + + if run_result.returncode != 0: + return EvaluationResult( + metrics={ + "score": 0.0, + "compile_success": 1.0, + "correctness": 0.0, + "performance_score": 0.0, + "adaptability_score": 0.0 + }, + artifacts={ + "error": "Runtime error", + "stderr": run_result.stderr + } + ) + + # Parse JSON output + try: + # Find JSON in output (between first { and last }) + output = run_result.stdout + start = output.find('{') + end = output.rfind('}') + 1 + json_str = output[start:end] + + results = json.loads(json_str) + + # Calculate overall score + correctness = results['correctness'] + performance = results['performance_score'] + adaptability = results['adaptability_score'] + + # Weighted score (correctness is mandatory) + if correctness < 1.0: + overall_score = 0.0 + else: + overall_score = ( + 0.6 * performance + + 0.4 * adaptability + ) + + # Check for memory safety (basic check via valgrind if available) + memory_safe = 1.0 # Rust is memory safe by default + + return EvaluationResult( + metrics={ + "score": overall_score, + "compile_success": 1.0, + "correctness": correctness, + "performance_score": performance, + "adaptability_score": adaptability, + "avg_time": results['avg_time'], + "memory_safe": memory_safe + }, + artifacts={ + "times": results['times'], + "all_correct": results['all_correct'], + "build_output": build_result.stdout + } + ) + + except (json.JSONDecodeError, KeyError) as e: + return EvaluationResult( + metrics={ + "score": 0.0, + "compile_success": 1.0, + "correctness": 0.0, + "performance_score": 0.0, + "adaptability_score": 0.0 + }, + artifacts={ + "error": f"Failed to parse results: {str(e)}", + "stdout": run_result.stdout + } + ) + + except subprocess.TimeoutExpired: + return EvaluationResult( + metrics={ + "score": 0.0, + "compile_success": 0.0, + "correctness": 0.0, + "performance_score": 0.0, + "adaptability_score": 0.0 + }, + artifacts={"error": "Timeout during evaluation"} + ) + except Exception as e: + return EvaluationResult( + metrics={ + "score": 0.0, + "compile_success": 0.0, + "correctness": 0.0, + "performance_score": 0.0, + "adaptability_score": 0.0 + }, + artifacts={"error": str(e), "type": "evaluation_error"} + ) + + +# For testing +if __name__ == "__main__": + import sys + if len(sys.argv) > 1: + result = asyncio.run(evaluate(sys.argv[1])) + print(f"Score: {result.metrics['score']:.4f}") + print(f"Correctness: {result.metrics['correctness']:.4f}") + print(f"Performance: {result.metrics['performance_score']:.4f}") + print(f"Adaptability: {result.metrics['adaptability_score']:.4f}") \ No newline at end of file diff --git a/examples/rust_adaptive_sort/initial_program.rs b/examples/rust_adaptive_sort/initial_program.rs new file mode 100644 index 000000000..a8ee629bf --- /dev/null +++ b/examples/rust_adaptive_sort/initial_program.rs @@ -0,0 +1,153 @@ +// Adaptive Sorting Algorithm Implementation +// This program implements a sorting algorithm that can be evolved to adapt to different data patterns + +use std::cmp::Ordering; + +// EVOLVE-BLOCK-START +// Initial implementation: Simple quicksort +// This can be evolved to: +// - Hybrid algorithms (introsort, timsort-like) +// - Adaptive pivot selection +// - Special handling for nearly sorted data +// - Switching to different algorithms based on data characteristics + +pub fn adaptive_sort(arr: &mut [T]) { + if arr.len() <= 1 { + return; + } + + // Use quicksort as the base implementation + quicksort(arr, 0, arr.len() - 1); +} + +fn quicksort(arr: &mut [T], low: usize, high: usize) { + if low < high { + let pivot_index = partition(arr, low, high); + + // Recursively sort elements before and after partition + if pivot_index > 0 { + quicksort(arr, low, pivot_index - 1); + } + quicksort(arr, pivot_index + 1, high); + } +} + +fn partition(arr: &mut [T], low: usize, high: usize) -> usize { + // Choose the last element as pivot (can be evolved to use better strategies) + let pivot = arr[high].clone(); + let mut i = low; + + for j in low..high { + if arr[j] <= pivot { + arr.swap(i, j); + i += 1; + } + } + + arr.swap(i, high); + i +} + +// Helper function to detect if array is nearly sorted +fn is_nearly_sorted(arr: &[T], threshold: f64) -> bool { + if arr.len() <= 1 { + return true; + } + + let mut inversions = 0; + let max_inversions = ((arr.len() * (arr.len() - 1)) / 2) as f64 * threshold; + + for i in 0..arr.len() - 1 { + for j in i + 1..arr.len() { + if arr[i] > arr[j] { + inversions += 1; + if inversions as f64 > max_inversions { + return false; + } + } + } + } + + true +} + +// Helper function for insertion sort (useful for small arrays) +fn insertion_sort(arr: &mut [T]) { + for i in 1..arr.len() { + let mut j = i; + while j > 0 && arr[j - 1] > arr[j] { + arr.swap(j, j - 1); + j -= 1; + } + } +} +// EVOLVE-BLOCK-END + +// Benchmark function to test the sort implementation +pub fn run_benchmark(test_data: Vec>) -> BenchmarkResults { + let mut results = BenchmarkResults { + times: Vec::new(), + correctness: Vec::new(), + adaptability_score: 0.0, + }; + + for data in test_data { + let mut arr = data.clone(); + let start = std::time::Instant::now(); + + adaptive_sort(&mut arr); + + let elapsed = start.elapsed(); + results.times.push(elapsed.as_secs_f64()); + + // Check if correctly sorted + let is_sorted = arr.windows(2).all(|w| w[0] <= w[1]); + results.correctness.push(is_sorted); + } + + // Calculate adaptability score based on performance variance + if results.times.len() > 1 { + let mean_time: f64 = results.times.iter().sum::() / results.times.len() as f64; + let variance: f64 = results.times.iter() + .map(|t| (t - mean_time).powi(2)) + .sum::() / results.times.len() as f64; + + // Lower variance means better adaptability + results.adaptability_score = 1.0 / (1.0 + variance.sqrt()); + } + + results +} + +#[derive(Debug)] +pub struct BenchmarkResults { + pub times: Vec, + pub correctness: Vec, + pub adaptability_score: f64, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_basic_sort() { + let mut arr = vec![3, 1, 4, 1, 5, 9, 2, 6]; + adaptive_sort(&mut arr); + assert_eq!(arr, vec![1, 1, 2, 3, 4, 5, 6, 9]); + } + + #[test] + fn test_empty_array() { + let mut arr: Vec = vec![]; + adaptive_sort(&mut arr); + assert_eq!(arr, vec![]); + } + + #[test] + fn test_single_element() { + let mut arr = vec![42]; + adaptive_sort(&mut arr); + assert_eq!(arr, vec![42]); + } +} \ No newline at end of file diff --git a/examples/rust_adaptive_sort/requirements.txt b/examples/rust_adaptive_sort/requirements.txt new file mode 100644 index 000000000..1de70e215 --- /dev/null +++ b/examples/rust_adaptive_sort/requirements.txt @@ -0,0 +1,11 @@ +# System requirements for Rust adaptive sorting example + +# System dependencies: +# - Rust toolchain (rustc, cargo) - install from https://rustup.rs/ +# - Cargo package manager (comes with Rust) + +# Python requirements for the evaluator +numpy>=1.22.0 + +# The Rust code will be compiled and run using Cargo, +# so no additional Rust dependencies need to be pre-installed. \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index be78af2d8..94f30a35c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "openevolve" -version = "0.0.10" +version = "0.0.11" description = "Open-source implementation of AlphaEvolve" readme = "README.md" requires-python = ">=3.9" diff --git a/setup.py b/setup.py index f646ebc19..e7ea0d5bb 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name="openevolve", - version="0.0.10", + version="0.0.11", packages=find_packages(), include_package_data=True, )